1#!/usr/bin/env python3 2# 3# Copyright (C) 2017 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""pprof_proto_generator.py: read perf.data, generate pprof.profile, which can be 19 used by pprof. 20 21 Example: 22 ./app_profiler.py 23 ./pprof_proto_generator.py 24 pprof -text pprof.profile 25""" 26 27import logging 28import os 29import os.path 30import re 31import sys 32 33from simpleperf_report_lib import ReportLib 34from simpleperf_utils import (Addr2Nearestline, BaseArgumentParser, BinaryFinder, extant_dir, 35 flatten_arg_list, log_exit, ReadElf, ToolFinder) 36try: 37 import profile_pb2 38except ImportError: 39 log_exit('google.protobuf module is missing. Please install it first.') 40 41 42# Some units of common event names 43EVENT_UNITS = { 44 'cpu-clock': 'nanoseconds', 45 'cpu-cycles': 'cpu-cycles', 46 'instructions': 'instructions', 47 'task-clock': 'nanoseconds', 48} 49 50 51def load_pprof_profile(filename): 52 profile = profile_pb2.Profile() 53 with open(filename, "rb") as f: 54 profile.ParseFromString(f.read()) 55 return profile 56 57 58def store_pprof_profile(filename, profile): 59 with open(filename, 'wb') as f: 60 f.write(profile.SerializeToString()) 61 62 63class PprofProfilePrinter(object): 64 65 def __init__(self, profile): 66 self.profile = profile 67 self.string_table = profile.string_table 68 69 def show(self): 70 p = self.profile 71 sub_space = ' ' 72 print('Profile {') 73 print('%d sample_types' % len(p.sample_type)) 74 for i in range(len(p.sample_type)): 75 print('sample_type[%d] = ' % i, end='') 76 self.show_value_type(p.sample_type[i]) 77 print('%d samples' % len(p.sample)) 78 for i in range(len(p.sample)): 79 print('sample[%d]:' % i) 80 self.show_sample(p.sample[i], sub_space) 81 print('%d mappings' % len(p.mapping)) 82 for i in range(len(p.mapping)): 83 print('mapping[%d]:' % i) 84 self.show_mapping(p.mapping[i], sub_space) 85 print('%d locations' % len(p.location)) 86 for i in range(len(p.location)): 87 print('location[%d]:' % i) 88 self.show_location(p.location[i], sub_space) 89 for i in range(len(p.function)): 90 print('function[%d]:' % i) 91 self.show_function(p.function[i], sub_space) 92 print('%d strings' % len(p.string_table)) 93 for i in range(len(p.string_table)): 94 print('string[%d]: %s' % (i, p.string_table[i])) 95 print('drop_frames: %s' % self.string(p.drop_frames)) 96 print('keep_frames: %s' % self.string(p.keep_frames)) 97 print('time_nanos: %u' % p.time_nanos) 98 print('duration_nanos: %u' % p.duration_nanos) 99 print('period_type: ', end='') 100 self.show_value_type(p.period_type) 101 print('period: %u' % p.period) 102 for i in range(len(p.comment)): 103 print('comment[%d] = %s' % (i, self.string(p.comment[i]))) 104 print('default_sample_type: %d' % p.default_sample_type) 105 print('} // Profile') 106 print() 107 108 def show_value_type(self, value_type, space=''): 109 print('%sValueType(typeID=%d, unitID=%d, type=%s, unit=%s)' % 110 (space, value_type.type, value_type.unit, 111 self.string(value_type.type), self.string(value_type.unit))) 112 113 def show_sample(self, sample, space=''): 114 sub_space = space + ' ' 115 for i in range(len(sample.location_id)): 116 print('%slocation_id[%d]: id %d' % (space, i, sample.location_id[i])) 117 self.show_location_id(sample.location_id[i], sub_space) 118 for i in range(len(sample.value)): 119 print('%svalue[%d] = %d' % (space, i, sample.value[i])) 120 for i in range(len(sample.label)): 121 print('%slabel[%d] = %s:%s' % (space, i, self.string(sample.label[i].key), 122 self.string(sample.label[i].str))) 123 124 def show_location_id(self, location_id, space=''): 125 location = self.profile.location[location_id - 1] 126 self.show_location(location, space) 127 128 def show_location(self, location, space=''): 129 sub_space = space + ' ' 130 print('%sid: %d' % (space, location.id)) 131 print('%smapping_id: %d' % (space, location.mapping_id)) 132 self.show_mapping_id(location.mapping_id, sub_space) 133 print('%saddress: %x' % (space, location.address)) 134 for i in range(len(location.line)): 135 print('%sline[%d]:' % (space, i)) 136 self.show_line(location.line[i], sub_space) 137 138 def show_mapping_id(self, mapping_id, space=''): 139 mapping = self.profile.mapping[mapping_id - 1] 140 self.show_mapping(mapping, space) 141 142 def show_mapping(self, mapping, space=''): 143 print('%sid: %d' % (space, mapping.id)) 144 print('%smemory_start: %x' % (space, mapping.memory_start)) 145 print('%smemory_limit: %x' % (space, mapping.memory_limit)) 146 print('%sfile_offset: %x' % (space, mapping.file_offset)) 147 print('%sfilename: %s(%d)' % (space, self.string(mapping.filename), 148 mapping.filename)) 149 print('%sbuild_id: %s(%d)' % (space, self.string(mapping.build_id), 150 mapping.build_id)) 151 print('%shas_functions: %s' % (space, mapping.has_functions)) 152 print('%shas_filenames: %s' % (space, mapping.has_filenames)) 153 print('%shas_line_numbers: %s' % (space, mapping.has_line_numbers)) 154 print('%shas_inline_frames: %s' % (space, mapping.has_inline_frames)) 155 156 def show_line(self, line, space=''): 157 sub_space = space + ' ' 158 print('%sfunction_id: %d' % (space, line.function_id)) 159 self.show_function_id(line.function_id, sub_space) 160 print('%sline: %d' % (space, line.line)) 161 162 def show_function_id(self, function_id, space=''): 163 function = self.profile.function[function_id - 1] 164 self.show_function(function, space) 165 166 def show_function(self, function, space=''): 167 print('%sid: %d' % (space, function.id)) 168 print('%sname: %s' % (space, self.string(function.name))) 169 print('%ssystem_name: %s' % (space, self.string(function.system_name))) 170 print('%sfilename: %s' % (space, self.string(function.filename))) 171 print('%sstart_line: %d' % (space, function.start_line)) 172 173 def string(self, string_id): 174 return self.string_table[string_id] 175 176 177class Label(object): 178 def __init__(self, key_id: int, str_id: int): 179 # See profile.Label.key 180 self.key_id = key_id 181 # See profile.Label.str 182 self.str_id = str_id 183 184 185class Sample(object): 186 187 def __init__(self): 188 self.location_ids = [] 189 self.values = {} 190 self.labels = [] 191 192 def add_location_id(self, location_id): 193 self.location_ids.append(location_id) 194 195 def add_value(self, sample_type_id, value): 196 self.values[sample_type_id] = self.values.get(sample_type_id, 0) + value 197 198 def add_values(self, values): 199 for sample_type_id, value in values.items(): 200 self.add_value(sample_type_id, value) 201 202 @property 203 def key(self): 204 return tuple(self.location_ids) 205 206 207class Location(object): 208 209 def __init__(self, mapping_id, address, vaddr_in_dso): 210 self.id = -1 # unset 211 self.mapping_id = mapping_id 212 self.address = address 213 self.vaddr_in_dso = vaddr_in_dso 214 self.lines = [] 215 216 @property 217 def key(self): 218 return (self.mapping_id, self.address) 219 220 221class Line(object): 222 223 def __init__(self): 224 self.function_id = 0 225 self.line = 0 226 227 228class Mapping(object): 229 230 def __init__(self, start, end, pgoff, filename_id, build_id_id): 231 self.id = -1 # unset 232 self.memory_start = start 233 self.memory_limit = end 234 self.file_offset = pgoff 235 self.filename_id = filename_id 236 self.build_id_id = build_id_id 237 238 @property 239 def key(self): 240 return ( 241 self.memory_start, 242 self.memory_limit, 243 self.file_offset, 244 self.filename_id, 245 self.build_id_id) 246 247 248class Function(object): 249 250 def __init__(self, name_id, dso_name_id, vaddr_in_dso): 251 self.id = -1 # unset 252 self.name_id = name_id 253 self.dso_name_id = dso_name_id 254 self.vaddr_in_dso = vaddr_in_dso 255 self.source_filename_id = 0 256 self.start_line = 0 257 258 @property 259 def key(self): 260 return (self.name_id, self.dso_name_id) 261 262 263# pylint: disable=no-member 264class PprofProfileGenerator(object): 265 266 def __init__(self, config): 267 self.config = config 268 self.lib = None 269 270 config['binary_cache_dir'] = 'binary_cache' 271 if not os.path.isdir(config['binary_cache_dir']): 272 config['binary_cache_dir'] = None 273 self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None 274 self.max_chain_length = config['max_chain_length'] 275 self.profile = profile_pb2.Profile() 276 self.profile.string_table.append('') 277 self.string_table = {} 278 self.sample_types = {} 279 self.sample_map = {} 280 self.sample_list = [] 281 self.location_map = {} 282 self.location_list = [] 283 self.mapping_map = {} 284 self.mapping_list = [] 285 self.function_map = {} 286 self.function_list = [] 287 288 # Map from dso_name in perf.data to (binary path, build_id). 289 self.binary_map = {} 290 self.read_elf = ReadElf(self.config['ndk_path']) 291 self.binary_finder = BinaryFinder(config['binary_cache_dir'], self.read_elf) 292 293 def load_record_file(self, record_file): 294 self.lib = ReportLib() 295 self.lib.SetRecordFile(record_file) 296 297 if self.config['binary_cache_dir']: 298 self.lib.SetSymfs(self.config['binary_cache_dir']) 299 kallsyms = os.path.join(self.config['binary_cache_dir'], 'kallsyms') 300 if os.path.isfile(kallsyms): 301 self.lib.SetKallsymsFile(kallsyms) 302 303 if self.config.get('show_art_frames'): 304 self.lib.ShowArtFrames() 305 self.lib.SetReportOptions(self.config['report_lib_options']) 306 307 comments = [ 308 "Simpleperf Record Command:\n" + self.lib.GetRecordCmd(), 309 "Converted to pprof with:\n" + " ".join(sys.argv), 310 "Architecture:\n" + self.lib.GetArch(), 311 ] 312 for comment in comments: 313 self.profile.comment.append(self.get_string_id(comment)) 314 315 numbers_re = re.compile(r"\d+") 316 317 # Process all samples in perf.data, aggregate samples. 318 while True: 319 report_sample = self.lib.GetNextSample() 320 if report_sample is None: 321 self.lib.Close() 322 self.lib = None 323 break 324 event = self.lib.GetEventOfCurrentSample() 325 symbol = self.lib.GetSymbolOfCurrentSample() 326 callchain = self.lib.GetCallChainOfCurrentSample() 327 328 sample_type_id = self.get_sample_type_id(event.name) 329 sample = Sample() 330 sample.add_value(sample_type_id, 1) 331 sample.add_value(sample_type_id + 1, report_sample.period) 332 sample.labels.append(Label( 333 self.get_string_id("thread"), 334 self.get_string_id(report_sample.thread_comm))) 335 # Heuristic: threadpools doing similar work are often named as 336 # name-1, name-2, name-3. Combine threadpools into one label 337 # "name-%d" if they only differ by a number. 338 sample.labels.append(Label( 339 self.get_string_id("threadpool"), 340 self.get_string_id( 341 numbers_re.sub("%d", report_sample.thread_comm)))) 342 sample.labels.append(Label( 343 self.get_string_id("pid"), 344 self.get_string_id(str(report_sample.pid)))) 345 sample.labels.append(Label( 346 self.get_string_id("tid"), 347 self.get_string_id(str(report_sample.tid)))) 348 if self._filter_symbol(symbol): 349 location_id = self.get_location_id(report_sample.ip, symbol) 350 sample.add_location_id(location_id) 351 for i in range(max(0, callchain.nr - self.max_chain_length), callchain.nr): 352 entry = callchain.entries[i] 353 if self._filter_symbol(symbol): 354 location_id = self.get_location_id(entry.ip, entry.symbol) 355 sample.add_location_id(location_id) 356 if sample.location_ids: 357 self.add_sample(sample) 358 359 def gen(self, jobs: int): 360 # 1. Generate line info for locations and functions. 361 self.gen_source_lines(jobs) 362 363 # 2. Produce samples/locations/functions in profile. 364 for sample in self.sample_list: 365 self.gen_profile_sample(sample) 366 for mapping in self.mapping_list: 367 self.gen_profile_mapping(mapping) 368 for location in self.location_list: 369 self.gen_profile_location(location) 370 for function in self.function_list: 371 self.gen_profile_function(function) 372 373 return self.profile 374 375 def _filter_symbol(self, symbol): 376 if not self.dso_filter or symbol.dso_name in self.dso_filter: 377 return True 378 return False 379 380 def get_string_id(self, str_value): 381 if not str_value: 382 return 0 383 str_id = self.string_table.get(str_value) 384 if str_id is not None: 385 return str_id 386 str_id = len(self.string_table) + 1 387 self.string_table[str_value] = str_id 388 self.profile.string_table.append(str_value) 389 return str_id 390 391 def get_string(self, str_id): 392 return self.profile.string_table[str_id] 393 394 def get_sample_type_id(self, name): 395 sample_type_id = self.sample_types.get(name) 396 if sample_type_id is not None: 397 return sample_type_id 398 sample_type_id = len(self.profile.sample_type) 399 sample_type = self.profile.sample_type.add() 400 sample_type.type = self.get_string_id(name + '_samples') 401 sample_type.unit = self.get_string_id('samples') 402 sample_type = self.profile.sample_type.add() 403 sample_type.type = self.get_string_id(name) 404 units = EVENT_UNITS.get(name, 'count') 405 sample_type.unit = self.get_string_id(units) 406 self.sample_types[name] = sample_type_id 407 return sample_type_id 408 409 def get_location_id(self, ip, symbol): 410 binary_path, build_id = self.get_binary(symbol.dso_name) 411 mapping_id = self.get_mapping_id(symbol.mapping[0], binary_path, build_id) 412 location = Location(mapping_id, ip, symbol.vaddr_in_file) 413 function_id = self.get_function_id(symbol.symbol_name, binary_path, symbol.symbol_addr) 414 if function_id: 415 # Add Line only when it has a valid function id, see http://b/36988814. 416 # Default line info only contains the function name 417 line = Line() 418 line.function_id = function_id 419 location.lines.append(line) 420 421 exist_location = self.location_map.get(location.key) 422 if exist_location: 423 return exist_location.id 424 # location_id starts from 1 425 location.id = len(self.location_list) + 1 426 self.location_list.append(location) 427 self.location_map[location.key] = location 428 return location.id 429 430 def get_mapping_id(self, report_mapping, filename, build_id): 431 filename_id = self.get_string_id(filename) 432 build_id_id = self.get_string_id(build_id) 433 mapping = Mapping(report_mapping.start, report_mapping.end, 434 report_mapping.pgoff, filename_id, build_id_id) 435 exist_mapping = self.mapping_map.get(mapping.key) 436 if exist_mapping: 437 return exist_mapping.id 438 # mapping_id starts from 1 439 mapping.id = len(self.mapping_list) + 1 440 self.mapping_list.append(mapping) 441 self.mapping_map[mapping.key] = mapping 442 return mapping.id 443 444 def get_binary(self, dso_name): 445 """ Return (binary_path, build_id) for a given dso_name. """ 446 value = self.binary_map.get(dso_name) 447 if value: 448 return value 449 450 binary_path = dso_name 451 build_id = self.lib.GetBuildIdForPath(dso_name) 452 # Try elf_path in binary cache. 453 elf_path = self.binary_finder.find_binary(dso_name, build_id) 454 if elf_path: 455 binary_path = str(elf_path) 456 457 # The build ids in perf.data are padded to 20 bytes, but pprof needs without padding. 458 build_id = ReadElf.unpad_build_id(build_id) 459 self.binary_map[dso_name] = (binary_path, build_id) 460 return (binary_path, build_id) 461 462 def get_mapping(self, mapping_id): 463 return self.mapping_list[mapping_id - 1] if mapping_id > 0 else None 464 465 def get_function_id(self, name, dso_name, vaddr_in_file): 466 if name == 'unknown': 467 return 0 468 function = Function(self.get_string_id(name), self.get_string_id(dso_name), vaddr_in_file) 469 exist_function = self.function_map.get(function.key) 470 if exist_function: 471 return exist_function.id 472 # function_id starts from 1 473 function.id = len(self.function_list) + 1 474 self.function_list.append(function) 475 self.function_map[function.key] = function 476 return function.id 477 478 def get_function(self, function_id): 479 return self.function_list[function_id - 1] if function_id > 0 else None 480 481 def add_sample(self, sample): 482 exist_sample = self.sample_map.get(sample.key) 483 if exist_sample: 484 exist_sample.add_values(sample.values) 485 else: 486 self.sample_list.append(sample) 487 self.sample_map[sample.key] = sample 488 489 def gen_source_lines(self, jobs: int): 490 # 1. Create Addr2line instance 491 if not self.config.get('binary_cache_dir'): 492 logging.info("Can't generate line information because binary_cache is missing.") 493 return 494 if not ToolFinder.find_tool_path('llvm-symbolizer', self.config['ndk_path']): 495 logging.info("Can't generate line information because can't find llvm-symbolizer.") 496 return 497 # We have changed dso names to paths in binary_cache in self.get_binary(). So no need to 498 # pass binary_cache_dir to BinaryFinder. 499 binary_finder = BinaryFinder(None, self.read_elf) 500 addr2line = Addr2Nearestline(self.config['ndk_path'], binary_finder, True) 501 502 # 2. Put all needed addresses to it. 503 for location in self.location_list: 504 mapping = self.get_mapping(location.mapping_id) 505 dso_name = self.get_string(mapping.filename_id) 506 if location.lines: 507 function = self.get_function(location.lines[0].function_id) 508 addr2line.add_addr(dso_name, None, function.vaddr_in_dso, location.vaddr_in_dso) 509 for function in self.function_list: 510 dso_name = self.get_string(function.dso_name_id) 511 addr2line.add_addr(dso_name, None, function.vaddr_in_dso, function.vaddr_in_dso) 512 513 # 3. Generate source lines. 514 addr2line.convert_addrs_to_lines(jobs) 515 516 # 4. Annotate locations and functions. 517 for location in self.location_list: 518 if not location.lines: 519 continue 520 mapping = self.get_mapping(location.mapping_id) 521 dso_name = self.get_string(mapping.filename_id) 522 dso = addr2line.get_dso(dso_name) 523 if not dso: 524 continue 525 sources = addr2line.get_addr_source(dso, location.vaddr_in_dso) 526 if not sources: 527 continue 528 for i, source in enumerate(sources): 529 source_file, source_line, function_name = source 530 if i == 0: 531 # Don't override original function name from report library, which is more 532 # accurate when proguard mapping file is given. 533 function_id = location.lines[0].function_id 534 # Clear default line info. 535 location.lines.clear() 536 else: 537 function_id = self.get_function_id(function_name, dso_name, 0) 538 if function_id == 0: 539 continue 540 location.lines.append(self.add_line(source_file, source_line, function_id)) 541 542 for function in self.function_list: 543 dso_name = self.get_string(function.dso_name_id) 544 if function.vaddr_in_dso: 545 dso = addr2line.get_dso(dso_name) 546 if not dso: 547 continue 548 sources = addr2line.get_addr_source(dso, function.vaddr_in_dso) 549 if sources: 550 source_file, source_line, _ = sources[0] 551 function.source_filename_id = self.get_string_id(source_file) 552 function.start_line = source_line 553 554 def add_line(self, source_file, source_line, function_id): 555 line = Line() 556 function = self.get_function(function_id) 557 function.source_filename_id = self.get_string_id(source_file) 558 line.function_id = function_id 559 line.line = source_line 560 return line 561 562 def gen_profile_sample(self, sample): 563 profile_sample = self.profile.sample.add() 564 profile_sample.location_id.extend(sample.location_ids) 565 sample_type_count = len(self.sample_types) * 2 566 values = [0] * sample_type_count 567 for sample_type_id in sample.values: 568 values[sample_type_id] = sample.values[sample_type_id] 569 profile_sample.value.extend(values) 570 571 for l in sample.labels: 572 label = profile_sample.label.add() 573 label.key = l.key_id 574 label.str = l.str_id 575 576 def gen_profile_mapping(self, mapping): 577 profile_mapping = self.profile.mapping.add() 578 profile_mapping.id = mapping.id 579 profile_mapping.memory_start = mapping.memory_start 580 profile_mapping.memory_limit = mapping.memory_limit 581 profile_mapping.file_offset = mapping.file_offset 582 profile_mapping.filename = mapping.filename_id 583 profile_mapping.build_id = mapping.build_id_id 584 profile_mapping.has_filenames = True 585 profile_mapping.has_functions = True 586 if self.config.get('binary_cache_dir'): 587 profile_mapping.has_line_numbers = True 588 profile_mapping.has_inline_frames = True 589 else: 590 profile_mapping.has_line_numbers = False 591 profile_mapping.has_inline_frames = False 592 593 def gen_profile_location(self, location): 594 profile_location = self.profile.location.add() 595 profile_location.id = location.id 596 profile_location.mapping_id = location.mapping_id 597 profile_location.address = location.address 598 for i in range(len(location.lines)): 599 line = profile_location.line.add() 600 line.function_id = location.lines[i].function_id 601 line.line = location.lines[i].line 602 603 def gen_profile_function(self, function): 604 profile_function = self.profile.function.add() 605 profile_function.id = function.id 606 profile_function.name = function.name_id 607 profile_function.system_name = function.name_id 608 profile_function.filename = function.source_filename_id 609 profile_function.start_line = function.start_line 610 611 612def main(): 613 parser = BaseArgumentParser(description='Generate pprof profile data in pprof.profile.') 614 parser.add_argument('--show', nargs='?', action='append', help='print existing pprof.profile.') 615 parser.add_argument('-i', '--record_file', nargs='+', default=['perf.data'], help=""" 616 Set profiling data file to report. Default is perf.data""") 617 parser.add_argument('-o', '--output_file', default='pprof.profile', help=""" 618 The path of generated pprof profile data.""") 619 parser.add_argument('--max_chain_length', type=int, default=1000000000, help=""" 620 Maximum depth of samples to be converted.""") # Large value as infinity standin. 621 parser.add_argument('--ndk_path', type=extant_dir, help='Set the path of a ndk release.') 622 parser.add_argument( 623 '-j', '--jobs', type=int, default=os.cpu_count(), 624 help='Use multithreading to speed up source code annotation.') 625 sample_filter_group = parser.add_argument_group('Sample filter options') 626 sample_filter_group.add_argument('--dso', nargs='+', action='append', help=""" 627 Use samples only in selected binaries.""") 628 parser.add_report_lib_options(sample_filter_group=sample_filter_group) 629 630 args = parser.parse_args() 631 if args.show: 632 show_file = args.show[0] if args.show[0] else 'pprof.profile' 633 profile = load_pprof_profile(show_file) 634 printer = PprofProfilePrinter(profile) 635 printer.show() 636 return 637 638 config = {} 639 config['output_file'] = args.output_file 640 config['dso_filters'] = flatten_arg_list(args.dso) 641 config['ndk_path'] = args.ndk_path 642 config['max_chain_length'] = args.max_chain_length 643 config['report_lib_options'] = args.report_lib_options 644 generator = PprofProfileGenerator(config) 645 for record_file in args.record_file: 646 generator.load_record_file(record_file) 647 profile = generator.gen(args.jobs) 648 store_pprof_profile(config['output_file'], profile) 649 logging.info("Report is generated at '%s' successfully." % config['output_file']) 650 651 652if __name__ == '__main__': 653 main() 654