1#!/usr/bin/env python 2# 3# Copyright 2012 the V8 project authors. All rights reserved. 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: 7# 8# * Redistributions of source code must retain the above copyright 9# notice, this list of conditions and the following disclaimer. 10# * Redistributions in binary form must reproduce the above 11# copyright notice, this list of conditions and the following 12# disclaimer in the documentation and/or other materials provided 13# with the distribution. 14# * Neither the name of Google Inc. nor the names of its 15# contributors may be used to endorse or promote products derived 16# from this software without specific prior written permission. 17# 18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30import bisect 31import collections 32import ctypes 33import disasm 34import mmap 35import optparse 36import os 37import re 38import subprocess 39import sys 40import time 41 42 43USAGE="""usage: %prog [OPTION]... 44 45Analyses V8 and perf logs to produce profiles. 46 47Perf logs can be collected using a command like: 48 $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof 49 # -R: collect all data 50 # -e cycles: use cpu-cycles event (run "perf list" for details) 51 # -c 10000: write a sample after each 10000 events 52 # -f: force output file overwrite 53 # -i: limit profiling to our process and the kernel 54 # --ll-prof shell flag enables the right V8 logs 55This will produce a binary trace file (perf.data) that %prog can analyse. 56 57IMPORTANT: 58 The kernel has an internal maximum for events per second, it is 100K by 59 default. That's not enough for "-c 10000". Set it to some higher value: 60 $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate 61 You can also make the warning about kernel address maps go away: 62 $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict 63 64We have a convenience script that handles all of the above for you: 65 $ tools/run-llprof.sh ./d8 bench.js 66 67Examples: 68 # Print flat profile with annotated disassembly for the 10 top 69 # symbols. Use default log names and include the snapshot log. 70 $ %prog --snapshot --disasm-top=10 71 72 # Print flat profile with annotated disassembly for all used symbols. 73 # Use default log names and include kernel symbols into analysis. 74 $ %prog --disasm-all --kernel 75 76 # Print flat profile. Use custom log names. 77 $ %prog --log=foo.log --snapshot-log=snap-foo.log --trace=foo.data --snapshot 78""" 79 80 81JS_ORIGIN = "js" 82JS_SNAPSHOT_ORIGIN = "js-snapshot" 83 84class Code(object): 85 """Code object.""" 86 87 _id = 0 88 UNKNOWN = 0 89 V8INTERNAL = 1 90 FULL_CODEGEN = 2 91 OPTIMIZED = 3 92 93 def __init__(self, name, start_address, end_address, origin, origin_offset): 94 self.id = Code._id 95 Code._id += 1 96 self.name = name 97 self.other_names = None 98 self.start_address = start_address 99 self.end_address = end_address 100 self.origin = origin 101 self.origin_offset = origin_offset 102 self.self_ticks = 0 103 self.self_ticks_map = None 104 self.callee_ticks = None 105 if name.startswith("LazyCompile:*"): 106 self.codetype = Code.OPTIMIZED 107 elif name.startswith("LazyCompile:"): 108 self.codetype = Code.FULL_CODEGEN 109 elif name.startswith("v8::internal::"): 110 self.codetype = Code.V8INTERNAL 111 else: 112 self.codetype = Code.UNKNOWN 113 114 def AddName(self, name): 115 assert self.name != name 116 if self.other_names is None: 117 self.other_names = [name] 118 return 119 if not name in self.other_names: 120 self.other_names.append(name) 121 122 def FullName(self): 123 if self.other_names is None: 124 return self.name 125 self.other_names.sort() 126 return "%s (aka %s)" % (self.name, ", ".join(self.other_names)) 127 128 def IsUsed(self): 129 return self.self_ticks > 0 or self.callee_ticks is not None 130 131 def Tick(self, pc): 132 self.self_ticks += 1 133 if self.self_ticks_map is None: 134 self.self_ticks_map = collections.defaultdict(lambda: 0) 135 offset = pc - self.start_address 136 self.self_ticks_map[offset] += 1 137 138 def CalleeTick(self, callee): 139 if self.callee_ticks is None: 140 self.callee_ticks = collections.defaultdict(lambda: 0) 141 self.callee_ticks[callee] += 1 142 143 def PrintAnnotated(self, arch, options): 144 if self.self_ticks_map is None: 145 ticks_map = [] 146 else: 147 ticks_map = self.self_ticks_map.items() 148 # Convert the ticks map to offsets and counts arrays so that later 149 # we can do binary search in the offsets array. 150 ticks_map.sort(key=lambda t: t[0]) 151 ticks_offsets = [t[0] for t in ticks_map] 152 ticks_counts = [t[1] for t in ticks_map] 153 # Get a list of disassembled lines and their addresses. 154 lines = self._GetDisasmLines(arch, options) 155 if len(lines) == 0: 156 return 157 # Print annotated lines. 158 address = lines[0][0] 159 total_count = 0 160 for i in xrange(len(lines)): 161 start_offset = lines[i][0] - address 162 if i == len(lines) - 1: 163 end_offset = self.end_address - self.start_address 164 else: 165 end_offset = lines[i + 1][0] - address 166 # Ticks (reported pc values) are not always precise, i.e. not 167 # necessarily point at instruction starts. So we have to search 168 # for ticks that touch the current instruction line. 169 j = bisect.bisect_left(ticks_offsets, end_offset) 170 count = 0 171 for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])): 172 if offset < start_offset: 173 break 174 count += cnt 175 total_count += count 176 count = 100.0 * count / self.self_ticks 177 if count >= 0.01: 178 print "%15.2f %x: %s" % (count, lines[i][0], lines[i][1]) 179 else: 180 print "%s %x: %s" % (" " * 15, lines[i][0], lines[i][1]) 181 print 182 assert total_count == self.self_ticks, \ 183 "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self) 184 185 def __str__(self): 186 return "%s [0x%x, 0x%x) size: %d origin: %s" % ( 187 self.name, 188 self.start_address, 189 self.end_address, 190 self.end_address - self.start_address, 191 self.origin) 192 193 def _GetDisasmLines(self, arch, options): 194 if self.origin == JS_ORIGIN or self.origin == JS_SNAPSHOT_ORIGIN: 195 inplace = False 196 filename = options.log + ".ll" 197 else: 198 inplace = True 199 filename = self.origin 200 return disasm.GetDisasmLines(filename, 201 self.origin_offset, 202 self.end_address - self.start_address, 203 arch, 204 inplace) 205 206 207class CodePage(object): 208 """Group of adjacent code objects.""" 209 210 SHIFT = 20 # 1M pages 211 SIZE = (1 << SHIFT) 212 MASK = ~(SIZE - 1) 213 214 @staticmethod 215 def PageAddress(address): 216 return address & CodePage.MASK 217 218 @staticmethod 219 def PageId(address): 220 return address >> CodePage.SHIFT 221 222 @staticmethod 223 def PageAddressFromId(id): 224 return id << CodePage.SHIFT 225 226 def __init__(self, address): 227 self.address = address 228 self.code_objects = [] 229 230 def Add(self, code): 231 self.code_objects.append(code) 232 233 def Remove(self, code): 234 self.code_objects.remove(code) 235 236 def Find(self, pc): 237 code_objects = self.code_objects 238 for i, code in enumerate(code_objects): 239 if code.start_address <= pc < code.end_address: 240 code_objects[0], code_objects[i] = code, code_objects[0] 241 return code 242 return None 243 244 def __iter__(self): 245 return self.code_objects.__iter__() 246 247 248class CodeMap(object): 249 """Code object map.""" 250 251 def __init__(self): 252 self.pages = {} 253 self.min_address = 1 << 64 254 self.max_address = -1 255 256 def Add(self, code, max_pages=-1): 257 page_id = CodePage.PageId(code.start_address) 258 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1) 259 pages = 0 260 while page_id < limit_id: 261 if max_pages >= 0 and pages > max_pages: 262 print >>sys.stderr, \ 263 "Warning: page limit (%d) reached for %s [%s]" % ( 264 max_pages, code.name, code.origin) 265 break 266 if page_id in self.pages: 267 page = self.pages[page_id] 268 else: 269 page = CodePage(CodePage.PageAddressFromId(page_id)) 270 self.pages[page_id] = page 271 page.Add(code) 272 page_id += 1 273 pages += 1 274 self.min_address = min(self.min_address, code.start_address) 275 self.max_address = max(self.max_address, code.end_address) 276 277 def Remove(self, code): 278 page_id = CodePage.PageId(code.start_address) 279 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1) 280 removed = False 281 while page_id < limit_id: 282 if page_id not in self.pages: 283 page_id += 1 284 continue 285 page = self.pages[page_id] 286 page.Remove(code) 287 removed = True 288 page_id += 1 289 return removed 290 291 def AllCode(self): 292 for page in self.pages.itervalues(): 293 for code in page: 294 if CodePage.PageAddress(code.start_address) == page.address: 295 yield code 296 297 def UsedCode(self): 298 for code in self.AllCode(): 299 if code.IsUsed(): 300 yield code 301 302 def Print(self): 303 for code in self.AllCode(): 304 print code 305 306 def Find(self, pc): 307 if pc < self.min_address or pc >= self.max_address: 308 return None 309 page_id = CodePage.PageId(pc) 310 if page_id not in self.pages: 311 return None 312 return self.pages[page_id].Find(pc) 313 314 315class CodeInfo(object): 316 """Generic info about generated code objects.""" 317 318 def __init__(self, arch, header_size): 319 self.arch = arch 320 self.header_size = header_size 321 322 323class SnapshotLogReader(object): 324 """V8 snapshot log reader.""" 325 326 _SNAPSHOT_CODE_NAME_RE = re.compile( 327 r"snapshot-code-name,(\d+),\"(.*)\"") 328 329 def __init__(self, log_name): 330 self.log_name = log_name 331 332 def ReadNameMap(self): 333 log = open(self.log_name, "r") 334 try: 335 snapshot_pos_to_name = {} 336 for line in log: 337 match = SnapshotLogReader._SNAPSHOT_CODE_NAME_RE.match(line) 338 if match: 339 pos = int(match.group(1)) 340 name = match.group(2) 341 snapshot_pos_to_name[pos] = name 342 finally: 343 log.close() 344 return snapshot_pos_to_name 345 346 347class LogReader(object): 348 """V8 low-level (binary) log reader.""" 349 350 _ARCH_TO_POINTER_TYPE_MAP = { 351 "ia32": ctypes.c_uint32, 352 "arm": ctypes.c_uint32, 353 "mips": ctypes.c_uint32, 354 "x64": ctypes.c_uint64 355 } 356 357 _CODE_CREATE_TAG = "C" 358 _CODE_MOVE_TAG = "M" 359 _CODE_DELETE_TAG = "D" 360 _SNAPSHOT_POSITION_TAG = "P" 361 _CODE_MOVING_GC_TAG = "G" 362 363 def __init__(self, log_name, code_map, snapshot_pos_to_name): 364 self.log_file = open(log_name, "r") 365 self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE) 366 self.log_pos = 0 367 self.code_map = code_map 368 self.snapshot_pos_to_name = snapshot_pos_to_name 369 self.address_to_snapshot_name = {} 370 371 self.arch = self.log[:self.log.find("\0")] 372 self.log_pos += len(self.arch) + 1 373 assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \ 374 "Unsupported architecture %s" % self.arch 375 pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch] 376 377 self.code_create_struct = LogReader._DefineStruct([ 378 ("name_size", ctypes.c_int32), 379 ("code_address", pointer_type), 380 ("code_size", ctypes.c_int32)]) 381 382 self.code_move_struct = LogReader._DefineStruct([ 383 ("from_address", pointer_type), 384 ("to_address", pointer_type)]) 385 386 self.code_delete_struct = LogReader._DefineStruct([ 387 ("address", pointer_type)]) 388 389 self.snapshot_position_struct = LogReader._DefineStruct([ 390 ("address", pointer_type), 391 ("position", ctypes.c_int32)]) 392 393 def ReadUpToGC(self): 394 while self.log_pos < self.log.size(): 395 tag = self.log[self.log_pos] 396 self.log_pos += 1 397 398 if tag == LogReader._CODE_MOVING_GC_TAG: 399 self.address_to_snapshot_name.clear() 400 return 401 402 if tag == LogReader._CODE_CREATE_TAG: 403 event = self.code_create_struct.from_buffer(self.log, self.log_pos) 404 self.log_pos += ctypes.sizeof(event) 405 start_address = event.code_address 406 end_address = start_address + event.code_size 407 if start_address in self.address_to_snapshot_name: 408 name = self.address_to_snapshot_name[start_address] 409 origin = JS_SNAPSHOT_ORIGIN 410 else: 411 name = self.log[self.log_pos:self.log_pos + event.name_size] 412 origin = JS_ORIGIN 413 self.log_pos += event.name_size 414 origin_offset = self.log_pos 415 self.log_pos += event.code_size 416 code = Code(name, start_address, end_address, origin, origin_offset) 417 conficting_code = self.code_map.Find(start_address) 418 if conficting_code: 419 if not (conficting_code.start_address == code.start_address and 420 conficting_code.end_address == code.end_address): 421 self.code_map.Remove(conficting_code) 422 else: 423 LogReader._HandleCodeConflict(conficting_code, code) 424 # TODO(vitalyr): this warning is too noisy because of our 425 # attempts to reconstruct code log from the snapshot. 426 # print >>sys.stderr, \ 427 # "Warning: Skipping duplicate code log entry %s" % code 428 continue 429 self.code_map.Add(code) 430 continue 431 432 if tag == LogReader._CODE_MOVE_TAG: 433 event = self.code_move_struct.from_buffer(self.log, self.log_pos) 434 self.log_pos += ctypes.sizeof(event) 435 old_start_address = event.from_address 436 new_start_address = event.to_address 437 if old_start_address == new_start_address: 438 # Skip useless code move entries. 439 continue 440 code = self.code_map.Find(old_start_address) 441 if not code: 442 print >>sys.stderr, "Warning: Not found %x" % old_start_address 443 continue 444 assert code.start_address == old_start_address, \ 445 "Inexact move address %x for %s" % (old_start_address, code) 446 self.code_map.Remove(code) 447 size = code.end_address - code.start_address 448 code.start_address = new_start_address 449 code.end_address = new_start_address + size 450 self.code_map.Add(code) 451 continue 452 453 if tag == LogReader._CODE_DELETE_TAG: 454 event = self.code_delete_struct.from_buffer(self.log, self.log_pos) 455 self.log_pos += ctypes.sizeof(event) 456 old_start_address = event.address 457 code = self.code_map.Find(old_start_address) 458 if not code: 459 print >>sys.stderr, "Warning: Not found %x" % old_start_address 460 continue 461 assert code.start_address == old_start_address, \ 462 "Inexact delete address %x for %s" % (old_start_address, code) 463 self.code_map.Remove(code) 464 continue 465 466 if tag == LogReader._SNAPSHOT_POSITION_TAG: 467 event = self.snapshot_position_struct.from_buffer(self.log, 468 self.log_pos) 469 self.log_pos += ctypes.sizeof(event) 470 start_address = event.address 471 snapshot_pos = event.position 472 if snapshot_pos in self.snapshot_pos_to_name: 473 self.address_to_snapshot_name[start_address] = \ 474 self.snapshot_pos_to_name[snapshot_pos] 475 continue 476 477 assert False, "Unknown tag %s" % tag 478 479 def Dispose(self): 480 self.log.close() 481 self.log_file.close() 482 483 @staticmethod 484 def _DefineStruct(fields): 485 class Struct(ctypes.Structure): 486 _fields_ = fields 487 return Struct 488 489 @staticmethod 490 def _HandleCodeConflict(old_code, new_code): 491 assert (old_code.start_address == new_code.start_address and 492 old_code.end_address == new_code.end_address), \ 493 "Conficting code log entries %s and %s" % (old_code, new_code) 494 if old_code.name == new_code.name: 495 return 496 # Code object may be shared by a few functions. Collect the full 497 # set of names. 498 old_code.AddName(new_code.name) 499 500 501class Descriptor(object): 502 """Descriptor of a structure in the binary trace log.""" 503 504 CTYPE_MAP = { 505 "u16": ctypes.c_uint16, 506 "u32": ctypes.c_uint32, 507 "u64": ctypes.c_uint64 508 } 509 510 def __init__(self, fields): 511 class TraceItem(ctypes.Structure): 512 _fields_ = Descriptor.CtypesFields(fields) 513 514 def __str__(self): 515 return ", ".join("%s: %s" % (field, self.__getattribute__(field)) 516 for field, _ in TraceItem._fields_) 517 518 self.ctype = TraceItem 519 520 def Read(self, trace, offset): 521 return self.ctype.from_buffer(trace, offset) 522 523 @staticmethod 524 def CtypesFields(fields): 525 return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields] 526 527 528# Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf 529# for the gory details. 530 531 532# Reference: struct perf_file_header in kernel/tools/perf/util/header.h 533TRACE_HEADER_DESC = Descriptor([ 534 ("magic", "u64"), 535 ("size", "u64"), 536 ("attr_size", "u64"), 537 ("attrs_offset", "u64"), 538 ("attrs_size", "u64"), 539 ("data_offset", "u64"), 540 ("data_size", "u64"), 541 ("event_types_offset", "u64"), 542 ("event_types_size", "u64") 543]) 544 545 546# Reference: /usr/include/linux/perf_event.h 547PERF_EVENT_ATTR_DESC = Descriptor([ 548 ("type", "u32"), 549 ("size", "u32"), 550 ("config", "u64"), 551 ("sample_period_or_freq", "u64"), 552 ("sample_type", "u64"), 553 ("read_format", "u64"), 554 ("flags", "u64"), 555 ("wakeup_events_or_watermark", "u32"), 556 ("bp_type", "u32"), 557 ("bp_addr", "u64"), 558 ("bp_len", "u64") 559]) 560 561 562# Reference: /usr/include/linux/perf_event.h 563PERF_EVENT_HEADER_DESC = Descriptor([ 564 ("type", "u32"), 565 ("misc", "u16"), 566 ("size", "u16") 567]) 568 569 570# Reference: kernel/events/core.c 571PERF_MMAP_EVENT_BODY_DESC = Descriptor([ 572 ("pid", "u32"), 573 ("tid", "u32"), 574 ("addr", "u64"), 575 ("len", "u64"), 576 ("pgoff", "u64") 577]) 578 579 580# perf_event_attr.sample_type bits control the set of 581# perf_sample_event fields. 582PERF_SAMPLE_IP = 1 << 0 583PERF_SAMPLE_TID = 1 << 1 584PERF_SAMPLE_TIME = 1 << 2 585PERF_SAMPLE_ADDR = 1 << 3 586PERF_SAMPLE_READ = 1 << 4 587PERF_SAMPLE_CALLCHAIN = 1 << 5 588PERF_SAMPLE_ID = 1 << 6 589PERF_SAMPLE_CPU = 1 << 7 590PERF_SAMPLE_PERIOD = 1 << 8 591PERF_SAMPLE_STREAM_ID = 1 << 9 592PERF_SAMPLE_RAW = 1 << 10 593 594 595# Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE. 596PERF_SAMPLE_EVENT_BODY_FIELDS = [ 597 ("ip", "u64", PERF_SAMPLE_IP), 598 ("pid", "u32", PERF_SAMPLE_TID), 599 ("tid", "u32", PERF_SAMPLE_TID), 600 ("time", "u64", PERF_SAMPLE_TIME), 601 ("addr", "u64", PERF_SAMPLE_ADDR), 602 ("id", "u64", PERF_SAMPLE_ID), 603 ("stream_id", "u64", PERF_SAMPLE_STREAM_ID), 604 ("cpu", "u32", PERF_SAMPLE_CPU), 605 ("res", "u32", PERF_SAMPLE_CPU), 606 ("period", "u64", PERF_SAMPLE_PERIOD), 607 # Don't want to handle read format that comes after the period and 608 # before the callchain and has variable size. 609 ("nr", "u64", PERF_SAMPLE_CALLCHAIN) 610 # Raw data follows the callchain and is ignored. 611] 612 613 614PERF_SAMPLE_EVENT_IP_FORMAT = "u64" 615 616 617PERF_RECORD_MMAP = 1 618PERF_RECORD_SAMPLE = 9 619 620 621class TraceReader(object): 622 """Perf (linux-2.6/tools/perf) trace file reader.""" 623 624 _TRACE_HEADER_MAGIC = 4993446653023372624 625 626 def __init__(self, trace_name): 627 self.trace_file = open(trace_name, "r") 628 self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE) 629 self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0) 630 if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC: 631 print >>sys.stderr, "Warning: unsupported trace header magic" 632 self.offset = self.trace_header.data_offset 633 self.limit = self.trace_header.data_offset + self.trace_header.data_size 634 assert self.limit <= self.trace.size(), \ 635 "Trace data limit exceeds trace file size" 636 self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype) 637 assert self.trace_header.attrs_size != 0, \ 638 "No perf event attributes found in the trace" 639 perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace, 640 self.trace_header.attrs_offset) 641 self.sample_event_body_desc = self._SampleEventBodyDesc( 642 perf_event_attr.sample_type) 643 self.callchain_supported = \ 644 (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0 645 if self.callchain_supported: 646 self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT] 647 self.ip_size = ctypes.sizeof(self.ip_struct) 648 649 def ReadEventHeader(self): 650 if self.offset >= self.limit: 651 return None, 0 652 offset = self.offset 653 header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset) 654 self.offset += header.size 655 return header, offset 656 657 def ReadMmap(self, header, offset): 658 mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace, 659 offset + self.header_size) 660 # Read null-terminated filename. 661 filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info): 662 offset + header.size] 663 mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))] 664 return mmap_info 665 666 def ReadSample(self, header, offset): 667 sample = self.sample_event_body_desc.Read(self.trace, 668 offset + self.header_size) 669 if not self.callchain_supported: 670 return sample 671 sample.ips = [] 672 offset += self.header_size + ctypes.sizeof(sample) 673 for _ in xrange(sample.nr): 674 sample.ips.append( 675 self.ip_struct.from_buffer(self.trace, offset).value) 676 offset += self.ip_size 677 return sample 678 679 def Dispose(self): 680 self.trace.close() 681 self.trace_file.close() 682 683 def _SampleEventBodyDesc(self, sample_type): 684 assert (sample_type & PERF_SAMPLE_READ) == 0, \ 685 "Can't hande read format in samples" 686 fields = [(field, format) 687 for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS 688 if (bit & sample_type) != 0] 689 return Descriptor(fields) 690 691 692OBJDUMP_SECTION_HEADER_RE = re.compile( 693 r"^\s*\d+\s(\.\S+)\s+[a-f0-9]") 694OBJDUMP_SYMBOL_LINE_RE = re.compile( 695 r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$") 696OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile( 697 r"^DYNAMIC SYMBOL TABLE") 698OBJDUMP_SKIP_RE = re.compile( 699 r"^.*ld\.so\.cache$") 700KERNEL_ALLSYMS_FILE = "/proc/kallsyms" 701PERF_KERNEL_ALLSYMS_RE = re.compile( 702 r".*kallsyms.*") 703KERNEL_ALLSYMS_LINE_RE = re.compile( 704 r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$") 705 706 707class LibraryRepo(object): 708 def __init__(self): 709 self.infos = [] 710 self.names = set() 711 self.ticks = {} 712 713 def Load(self, mmap_info, code_map, options): 714 # Skip kernel mmaps when requested using the fact that their tid 715 # is 0. 716 if mmap_info.tid == 0 and not options.kernel: 717 return True 718 if OBJDUMP_SKIP_RE.match(mmap_info.filename): 719 return True 720 if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename): 721 return self._LoadKernelSymbols(code_map) 722 self.infos.append(mmap_info) 723 mmap_info.ticks = 0 724 mmap_info.unique_name = self._UniqueMmapName(mmap_info) 725 if not os.path.exists(mmap_info.filename): 726 return True 727 # Request section headers (-h), symbols (-t), and dynamic symbols 728 # (-T) from objdump. 729 # Unfortunately, section headers span two lines, so we have to 730 # keep the just seen section name (from the first line in each 731 # section header) in the after_section variable. 732 if mmap_info.filename.endswith(".ko"): 733 dynamic_symbols = "" 734 else: 735 dynamic_symbols = "-T" 736 process = subprocess.Popen( 737 "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename), 738 shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 739 pipe = process.stdout 740 after_section = None 741 code_sections = set() 742 reloc_sections = set() 743 dynamic = False 744 try: 745 for line in pipe: 746 if after_section: 747 if line.find("CODE") != -1: 748 code_sections.add(after_section) 749 if line.find("RELOC") != -1: 750 reloc_sections.add(after_section) 751 after_section = None 752 continue 753 754 match = OBJDUMP_SECTION_HEADER_RE.match(line) 755 if match: 756 after_section = match.group(1) 757 continue 758 759 if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line): 760 dynamic = True 761 continue 762 763 match = OBJDUMP_SYMBOL_LINE_RE.match(line) 764 if match: 765 start_address = int(match.group(1), 16) 766 origin_offset = start_address 767 flags = match.group(2) 768 section = match.group(3) 769 if section in code_sections: 770 if dynamic or section in reloc_sections: 771 start_address += mmap_info.addr 772 size = int(match.group(4), 16) 773 name = match.group(5) 774 origin = mmap_info.filename 775 code_map.Add(Code(name, start_address, start_address + size, 776 origin, origin_offset)) 777 finally: 778 pipe.close() 779 assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename 780 781 def Tick(self, pc): 782 for i, mmap_info in enumerate(self.infos): 783 if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len): 784 mmap_info.ticks += 1 785 self.infos[0], self.infos[i] = mmap_info, self.infos[0] 786 return True 787 return False 788 789 def _UniqueMmapName(self, mmap_info): 790 name = mmap_info.filename 791 index = 1 792 while name in self.names: 793 name = "%s-%d" % (mmap_info.filename, index) 794 index += 1 795 self.names.add(name) 796 return name 797 798 def _LoadKernelSymbols(self, code_map): 799 if not os.path.exists(KERNEL_ALLSYMS_FILE): 800 print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE 801 return False 802 kallsyms = open(KERNEL_ALLSYMS_FILE, "r") 803 code = None 804 for line in kallsyms: 805 match = KERNEL_ALLSYMS_LINE_RE.match(line) 806 if match: 807 start_address = int(match.group(1), 16) 808 end_address = start_address 809 name = match.group(2) 810 if code: 811 code.end_address = start_address 812 code_map.Add(code, 16) 813 code = Code(name, start_address, end_address, "kernel", 0) 814 return True 815 816 817def PrintReport(code_map, library_repo, arch, ticks, options): 818 print "Ticks per symbol:" 819 used_code = [code for code in code_map.UsedCode()] 820 used_code.sort(key=lambda x: x.self_ticks, reverse=True) 821 for i, code in enumerate(used_code): 822 code_ticks = code.self_ticks 823 print "%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks, 824 code.FullName(), code.origin) 825 if options.disasm_all or i < options.disasm_top: 826 code.PrintAnnotated(arch, options) 827 print 828 print "Ticks per library:" 829 mmap_infos = [m for m in library_repo.infos if m.ticks > 0] 830 mmap_infos.sort(key=lambda m: m.ticks, reverse=True) 831 for mmap_info in mmap_infos: 832 mmap_ticks = mmap_info.ticks 833 print "%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks, 834 mmap_info.unique_name) 835 836 837def PrintDot(code_map, options): 838 print "digraph G {" 839 for code in code_map.UsedCode(): 840 if code.self_ticks < 10: 841 continue 842 print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name) 843 if code.callee_ticks: 844 for callee, ticks in code.callee_ticks.iteritems(): 845 print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks) 846 print "}" 847 848 849if __name__ == "__main__": 850 parser = optparse.OptionParser(USAGE) 851 parser.add_option("--snapshot-log", 852 default="obj/release/snapshot.log", 853 help="V8 snapshot log file name [default: %default]") 854 parser.add_option("--log", 855 default="v8.log", 856 help="V8 log file name [default: %default]") 857 parser.add_option("--snapshot", 858 default=False, 859 action="store_true", 860 help="process V8 snapshot log [default: %default]") 861 parser.add_option("--trace", 862 default="perf.data", 863 help="perf trace file name [default: %default]") 864 parser.add_option("--kernel", 865 default=False, 866 action="store_true", 867 help="process kernel entries [default: %default]") 868 parser.add_option("--disasm-top", 869 default=0, 870 type="int", 871 help=("number of top symbols to disassemble and annotate " 872 "[default: %default]")) 873 parser.add_option("--disasm-all", 874 default=False, 875 action="store_true", 876 help=("disassemble and annotate all used symbols " 877 "[default: %default]")) 878 parser.add_option("--dot", 879 default=False, 880 action="store_true", 881 help="produce dot output (WIP) [default: %default]") 882 parser.add_option("--quiet", "-q", 883 default=False, 884 action="store_true", 885 help="no auxiliary messages [default: %default]") 886 parser.add_option("--gc-fake-mmap", 887 default="/tmp/__v8_gc__", 888 help="gc fake mmap file [default: %default]") 889 parser.add_option("--objdump", 890 default="/usr/bin/objdump", 891 help="objdump tool to use [default: %default]") 892 parser.add_option("--host-root", 893 default="", 894 help="Path to the host root [default: %default]") 895 options, args = parser.parse_args() 896 897 if not options.quiet: 898 if options.snapshot: 899 print "V8 logs: %s, %s, %s.ll" % (options.snapshot_log, 900 options.log, 901 options.log) 902 else: 903 print "V8 log: %s, %s.ll (no snapshot)" % (options.log, options.log) 904 print "Perf trace file: %s" % options.trace 905 906 V8_GC_FAKE_MMAP = options.gc_fake_mmap 907 HOST_ROOT = options.host_root 908 if os.path.exists(options.objdump): 909 disasm.OBJDUMP_BIN = options.objdump 910 OBJDUMP_BIN = options.objdump 911 else: 912 print "Cannot find %s, falling back to default objdump" % options.objdump 913 914 # Stats. 915 events = 0 916 ticks = 0 917 missed_ticks = 0 918 really_missed_ticks = 0 919 optimized_ticks = 0 920 generated_ticks = 0 921 v8_internal_ticks = 0 922 mmap_time = 0 923 sample_time = 0 924 925 # Process the snapshot log to fill the snapshot name map. 926 snapshot_name_map = {} 927 if options.snapshot: 928 snapshot_log_reader = SnapshotLogReader(log_name=options.snapshot_log) 929 snapshot_name_map = snapshot_log_reader.ReadNameMap() 930 931 # Initialize the log reader. 932 code_map = CodeMap() 933 log_reader = LogReader(log_name=options.log + ".ll", 934 code_map=code_map, 935 snapshot_pos_to_name=snapshot_name_map) 936 if not options.quiet: 937 print "Generated code architecture: %s" % log_reader.arch 938 print 939 sys.stdout.flush() 940 941 # Process the code and trace logs. 942 library_repo = LibraryRepo() 943 log_reader.ReadUpToGC() 944 trace_reader = TraceReader(options.trace) 945 while True: 946 header, offset = trace_reader.ReadEventHeader() 947 if not header: 948 break 949 events += 1 950 if header.type == PERF_RECORD_MMAP: 951 start = time.time() 952 mmap_info = trace_reader.ReadMmap(header, offset) 953 if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP: 954 log_reader.ReadUpToGC() 955 else: 956 library_repo.Load(mmap_info, code_map, options) 957 mmap_time += time.time() - start 958 elif header.type == PERF_RECORD_SAMPLE: 959 ticks += 1 960 start = time.time() 961 sample = trace_reader.ReadSample(header, offset) 962 code = code_map.Find(sample.ip) 963 if code: 964 code.Tick(sample.ip) 965 if code.codetype == Code.OPTIMIZED: 966 optimized_ticks += 1 967 elif code.codetype == Code.FULL_CODEGEN: 968 generated_ticks += 1 969 elif code.codetype == Code.V8INTERNAL: 970 v8_internal_ticks += 1 971 else: 972 missed_ticks += 1 973 if not library_repo.Tick(sample.ip) and not code: 974 really_missed_ticks += 1 975 if trace_reader.callchain_supported: 976 for ip in sample.ips: 977 caller_code = code_map.Find(ip) 978 if caller_code: 979 if code: 980 caller_code.CalleeTick(code) 981 code = caller_code 982 sample_time += time.time() - start 983 984 if options.dot: 985 PrintDot(code_map, options) 986 else: 987 PrintReport(code_map, library_repo, log_reader.arch, ticks, options) 988 989 if not options.quiet: 990 def PrintTicks(number, total, description): 991 print("%10d %5.1f%% ticks in %s" % 992 (number, 100.0*number/total, description)) 993 print 994 print "Stats:" 995 print "%10d total trace events" % events 996 print "%10d total ticks" % ticks 997 print "%10d ticks not in symbols" % missed_ticks 998 unaccounted = "unaccounted ticks" 999 if really_missed_ticks > 0: 1000 unaccounted += " (probably in the kernel, try --kernel)" 1001 PrintTicks(really_missed_ticks, ticks, unaccounted) 1002 PrintTicks(optimized_ticks, ticks, "ticks in optimized code") 1003 PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code") 1004 PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*") 1005 print "%10d total symbols" % len([c for c in code_map.AllCode()]) 1006 print "%10d used symbols" % len([c for c in code_map.UsedCode()]) 1007 print "%9.2fs library processing time" % mmap_time 1008 print "%9.2fs tick processing time" % sample_time 1009 1010 log_reader.Dispose() 1011 trace_reader.Dispose() 1012