• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2#
3# Copyright 2012 the V8 project authors. All rights reserved.
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8#     * Redistributions of source code must retain the above copyright
9#       notice, this list of conditions and the following disclaimer.
10#     * Redistributions in binary form must reproduce the above
11#       copyright notice, this list of conditions and the following
12#       disclaimer in the documentation and/or other materials provided
13#       with the distribution.
14#     * Neither the name of Google Inc. nor the names of its
15#       contributors may be used to endorse or promote products derived
16#       from this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30import bisect
31import collections
32import ctypes
33import disasm
34import mmap
35import optparse
36import os
37import re
38import subprocess
39import sys
40import time
41
42
43USAGE="""usage: %prog [OPTION]...
44
45Analyses V8 and perf logs to produce profiles.
46
47Perf logs can be collected using a command like:
48  $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof
49  # -R: collect all data
50  # -e cycles: use cpu-cycles event (run "perf list" for details)
51  # -c 10000: write a sample after each 10000 events
52  # -f: force output file overwrite
53  # -i: limit profiling to our process and the kernel
54  # --ll-prof shell flag enables the right V8 logs
55This will produce a binary trace file (perf.data) that %prog can analyse.
56
57IMPORTANT:
58  The kernel has an internal maximum for events per second, it is 100K by
59  default. That's not enough for "-c 10000". Set it to some higher value:
60  $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate
61  You can also make the warning about kernel address maps go away:
62  $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
63
64We have a convenience script that handles all of the above for you:
65  $ tools/run-llprof.sh ./d8 bench.js
66
67Examples:
68  # Print flat profile with annotated disassembly for the 10 top
69  # symbols. Use default log names and include the snapshot log.
70  $ %prog --snapshot --disasm-top=10
71
72  # Print flat profile with annotated disassembly for all used symbols.
73  # Use default log names and include kernel symbols into analysis.
74  $ %prog --disasm-all --kernel
75
76  # Print flat profile. Use custom log names.
77  $ %prog --log=foo.log --snapshot-log=snap-foo.log --trace=foo.data --snapshot
78"""
79
80
81JS_ORIGIN = "js"
82JS_SNAPSHOT_ORIGIN = "js-snapshot"
83
84class Code(object):
85  """Code object."""
86
87  _id = 0
88  UNKNOWN = 0
89  V8INTERNAL = 1
90  FULL_CODEGEN = 2
91  OPTIMIZED = 3
92
93  def __init__(self, name, start_address, end_address, origin, origin_offset):
94    self.id = Code._id
95    Code._id += 1
96    self.name = name
97    self.other_names = None
98    self.start_address = start_address
99    self.end_address = end_address
100    self.origin = origin
101    self.origin_offset = origin_offset
102    self.self_ticks = 0
103    self.self_ticks_map = None
104    self.callee_ticks = None
105    if name.startswith("LazyCompile:*"):
106      self.codetype = Code.OPTIMIZED
107    elif name.startswith("LazyCompile:"):
108      self.codetype = Code.FULL_CODEGEN
109    elif name.startswith("v8::internal::"):
110      self.codetype = Code.V8INTERNAL
111    else:
112      self.codetype = Code.UNKNOWN
113
114  def AddName(self, name):
115    assert self.name != name
116    if self.other_names is None:
117      self.other_names = [name]
118      return
119    if not name in self.other_names:
120      self.other_names.append(name)
121
122  def FullName(self):
123    if self.other_names is None:
124      return self.name
125    self.other_names.sort()
126    return "%s (aka %s)" % (self.name, ", ".join(self.other_names))
127
128  def IsUsed(self):
129    return self.self_ticks > 0 or self.callee_ticks is not None
130
131  def Tick(self, pc):
132    self.self_ticks += 1
133    if self.self_ticks_map is None:
134      self.self_ticks_map = collections.defaultdict(lambda: 0)
135    offset = pc - self.start_address
136    self.self_ticks_map[offset] += 1
137
138  def CalleeTick(self, callee):
139    if self.callee_ticks is None:
140      self.callee_ticks = collections.defaultdict(lambda: 0)
141    self.callee_ticks[callee] += 1
142
143  def PrintAnnotated(self, arch, options):
144    if self.self_ticks_map is None:
145      ticks_map = []
146    else:
147      ticks_map = self.self_ticks_map.items()
148    # Convert the ticks map to offsets and counts arrays so that later
149    # we can do binary search in the offsets array.
150    ticks_map.sort(key=lambda t: t[0])
151    ticks_offsets = [t[0] for t in ticks_map]
152    ticks_counts = [t[1] for t in ticks_map]
153    # Get a list of disassembled lines and their addresses.
154    lines = self._GetDisasmLines(arch, options)
155    if len(lines) == 0:
156      return
157    # Print annotated lines.
158    address = lines[0][0]
159    total_count = 0
160    for i in xrange(len(lines)):
161      start_offset = lines[i][0] - address
162      if i == len(lines) - 1:
163        end_offset = self.end_address - self.start_address
164      else:
165        end_offset = lines[i + 1][0] - address
166      # Ticks (reported pc values) are not always precise, i.e. not
167      # necessarily point at instruction starts. So we have to search
168      # for ticks that touch the current instruction line.
169      j = bisect.bisect_left(ticks_offsets, end_offset)
170      count = 0
171      for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])):
172        if offset < start_offset:
173          break
174        count += cnt
175      total_count += count
176      count = 100.0 * count / self.self_ticks
177      if count >= 0.01:
178        print "%15.2f %x: %s" % (count, lines[i][0], lines[i][1])
179      else:
180        print "%s %x: %s" % (" " * 15, lines[i][0], lines[i][1])
181    print
182    assert total_count == self.self_ticks, \
183        "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self)
184
185  def __str__(self):
186    return "%s [0x%x, 0x%x) size: %d origin: %s" % (
187      self.name,
188      self.start_address,
189      self.end_address,
190      self.end_address - self.start_address,
191      self.origin)
192
193  def _GetDisasmLines(self, arch, options):
194    if self.origin == JS_ORIGIN or self.origin == JS_SNAPSHOT_ORIGIN:
195      inplace = False
196      filename = options.log + ".ll"
197    else:
198      inplace = True
199      filename = self.origin
200    return disasm.GetDisasmLines(filename,
201                                 self.origin_offset,
202                                 self.end_address - self.start_address,
203                                 arch,
204                                 inplace)
205
206
207class CodePage(object):
208  """Group of adjacent code objects."""
209
210  SHIFT = 20  # 1M pages
211  SIZE = (1 << SHIFT)
212  MASK = ~(SIZE - 1)
213
214  @staticmethod
215  def PageAddress(address):
216    return address & CodePage.MASK
217
218  @staticmethod
219  def PageId(address):
220    return address >> CodePage.SHIFT
221
222  @staticmethod
223  def PageAddressFromId(id):
224    return id << CodePage.SHIFT
225
226  def __init__(self, address):
227    self.address = address
228    self.code_objects = []
229
230  def Add(self, code):
231    self.code_objects.append(code)
232
233  def Remove(self, code):
234    self.code_objects.remove(code)
235
236  def Find(self, pc):
237    code_objects = self.code_objects
238    for i, code in enumerate(code_objects):
239      if code.start_address <= pc < code.end_address:
240        code_objects[0], code_objects[i] = code, code_objects[0]
241        return code
242    return None
243
244  def __iter__(self):
245    return self.code_objects.__iter__()
246
247
248class CodeMap(object):
249  """Code object map."""
250
251  def __init__(self):
252    self.pages = {}
253    self.min_address = 1 << 64
254    self.max_address = -1
255
256  def Add(self, code, max_pages=-1):
257    page_id = CodePage.PageId(code.start_address)
258    limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
259    pages = 0
260    while page_id < limit_id:
261      if max_pages >= 0 and pages > max_pages:
262        print >>sys.stderr, \
263            "Warning: page limit (%d) reached for %s [%s]" % (
264            max_pages, code.name, code.origin)
265        break
266      if page_id in self.pages:
267        page = self.pages[page_id]
268      else:
269        page = CodePage(CodePage.PageAddressFromId(page_id))
270        self.pages[page_id] = page
271      page.Add(code)
272      page_id += 1
273      pages += 1
274    self.min_address = min(self.min_address, code.start_address)
275    self.max_address = max(self.max_address, code.end_address)
276
277  def Remove(self, code):
278    page_id = CodePage.PageId(code.start_address)
279    limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
280    removed = False
281    while page_id < limit_id:
282      if page_id not in self.pages:
283        page_id += 1
284        continue
285      page = self.pages[page_id]
286      page.Remove(code)
287      removed = True
288      page_id += 1
289    return removed
290
291  def AllCode(self):
292    for page in self.pages.itervalues():
293      for code in page:
294        if CodePage.PageAddress(code.start_address) == page.address:
295          yield code
296
297  def UsedCode(self):
298    for code in self.AllCode():
299      if code.IsUsed():
300        yield code
301
302  def Print(self):
303    for code in self.AllCode():
304      print code
305
306  def Find(self, pc):
307    if pc < self.min_address or pc >= self.max_address:
308      return None
309    page_id = CodePage.PageId(pc)
310    if page_id not in self.pages:
311      return None
312    return self.pages[page_id].Find(pc)
313
314
315class CodeInfo(object):
316  """Generic info about generated code objects."""
317
318  def __init__(self, arch, header_size):
319    self.arch = arch
320    self.header_size = header_size
321
322
323class SnapshotLogReader(object):
324  """V8 snapshot log reader."""
325
326  _SNAPSHOT_CODE_NAME_RE = re.compile(
327    r"snapshot-code-name,(\d+),\"(.*)\"")
328
329  def __init__(self, log_name):
330    self.log_name = log_name
331
332  def ReadNameMap(self):
333    log = open(self.log_name, "r")
334    try:
335      snapshot_pos_to_name = {}
336      for line in log:
337        match = SnapshotLogReader._SNAPSHOT_CODE_NAME_RE.match(line)
338        if match:
339          pos = int(match.group(1))
340          name = match.group(2)
341          snapshot_pos_to_name[pos] = name
342    finally:
343      log.close()
344    return snapshot_pos_to_name
345
346
347class LogReader(object):
348  """V8 low-level (binary) log reader."""
349
350  _ARCH_TO_POINTER_TYPE_MAP = {
351    "ia32": ctypes.c_uint32,
352    "arm": ctypes.c_uint32,
353    "mips": ctypes.c_uint32,
354    "x64": ctypes.c_uint64
355  }
356
357  _CODE_CREATE_TAG = "C"
358  _CODE_MOVE_TAG = "M"
359  _CODE_DELETE_TAG = "D"
360  _SNAPSHOT_POSITION_TAG = "P"
361  _CODE_MOVING_GC_TAG = "G"
362
363  def __init__(self, log_name, code_map, snapshot_pos_to_name):
364    self.log_file = open(log_name, "r")
365    self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE)
366    self.log_pos = 0
367    self.code_map = code_map
368    self.snapshot_pos_to_name = snapshot_pos_to_name
369    self.address_to_snapshot_name = {}
370
371    self.arch = self.log[:self.log.find("\0")]
372    self.log_pos += len(self.arch) + 1
373    assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \
374        "Unsupported architecture %s" % self.arch
375    pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch]
376
377    self.code_create_struct = LogReader._DefineStruct([
378        ("name_size", ctypes.c_int32),
379        ("code_address", pointer_type),
380        ("code_size", ctypes.c_int32)])
381
382    self.code_move_struct = LogReader._DefineStruct([
383        ("from_address", pointer_type),
384        ("to_address", pointer_type)])
385
386    self.code_delete_struct = LogReader._DefineStruct([
387        ("address", pointer_type)])
388
389    self.snapshot_position_struct = LogReader._DefineStruct([
390        ("address", pointer_type),
391        ("position", ctypes.c_int32)])
392
393  def ReadUpToGC(self):
394    while self.log_pos < self.log.size():
395      tag = self.log[self.log_pos]
396      self.log_pos += 1
397
398      if tag == LogReader._CODE_MOVING_GC_TAG:
399        self.address_to_snapshot_name.clear()
400        return
401
402      if tag == LogReader._CODE_CREATE_TAG:
403        event = self.code_create_struct.from_buffer(self.log, self.log_pos)
404        self.log_pos += ctypes.sizeof(event)
405        start_address = event.code_address
406        end_address = start_address + event.code_size
407        if start_address in self.address_to_snapshot_name:
408          name = self.address_to_snapshot_name[start_address]
409          origin = JS_SNAPSHOT_ORIGIN
410        else:
411          name = self.log[self.log_pos:self.log_pos + event.name_size]
412          origin = JS_ORIGIN
413        self.log_pos += event.name_size
414        origin_offset = self.log_pos
415        self.log_pos += event.code_size
416        code = Code(name, start_address, end_address, origin, origin_offset)
417        conficting_code = self.code_map.Find(start_address)
418        if conficting_code:
419          if not (conficting_code.start_address == code.start_address and
420            conficting_code.end_address == code.end_address):
421            self.code_map.Remove(conficting_code)
422          else:
423            LogReader._HandleCodeConflict(conficting_code, code)
424            # TODO(vitalyr): this warning is too noisy because of our
425            # attempts to reconstruct code log from the snapshot.
426            # print >>sys.stderr, \
427            #     "Warning: Skipping duplicate code log entry %s" % code
428            continue
429        self.code_map.Add(code)
430        continue
431
432      if tag == LogReader._CODE_MOVE_TAG:
433        event = self.code_move_struct.from_buffer(self.log, self.log_pos)
434        self.log_pos += ctypes.sizeof(event)
435        old_start_address = event.from_address
436        new_start_address = event.to_address
437        if old_start_address == new_start_address:
438          # Skip useless code move entries.
439          continue
440        code = self.code_map.Find(old_start_address)
441        if not code:
442          print >>sys.stderr, "Warning: Not found %x" % old_start_address
443          continue
444        assert code.start_address == old_start_address, \
445            "Inexact move address %x for %s" % (old_start_address, code)
446        self.code_map.Remove(code)
447        size = code.end_address - code.start_address
448        code.start_address = new_start_address
449        code.end_address = new_start_address + size
450        self.code_map.Add(code)
451        continue
452
453      if tag == LogReader._CODE_DELETE_TAG:
454        event = self.code_delete_struct.from_buffer(self.log, self.log_pos)
455        self.log_pos += ctypes.sizeof(event)
456        old_start_address = event.address
457        code = self.code_map.Find(old_start_address)
458        if not code:
459          print >>sys.stderr, "Warning: Not found %x" % old_start_address
460          continue
461        assert code.start_address == old_start_address, \
462            "Inexact delete address %x for %s" % (old_start_address, code)
463        self.code_map.Remove(code)
464        continue
465
466      if tag == LogReader._SNAPSHOT_POSITION_TAG:
467        event = self.snapshot_position_struct.from_buffer(self.log,
468                                                          self.log_pos)
469        self.log_pos += ctypes.sizeof(event)
470        start_address = event.address
471        snapshot_pos = event.position
472        if snapshot_pos in self.snapshot_pos_to_name:
473          self.address_to_snapshot_name[start_address] = \
474              self.snapshot_pos_to_name[snapshot_pos]
475        continue
476
477      assert False, "Unknown tag %s" % tag
478
479  def Dispose(self):
480    self.log.close()
481    self.log_file.close()
482
483  @staticmethod
484  def _DefineStruct(fields):
485    class Struct(ctypes.Structure):
486      _fields_ = fields
487    return Struct
488
489  @staticmethod
490  def _HandleCodeConflict(old_code, new_code):
491    assert (old_code.start_address == new_code.start_address and
492            old_code.end_address == new_code.end_address), \
493        "Conficting code log entries %s and %s" % (old_code, new_code)
494    if old_code.name == new_code.name:
495      return
496    # Code object may be shared by a few functions. Collect the full
497    # set of names.
498    old_code.AddName(new_code.name)
499
500
501class Descriptor(object):
502  """Descriptor of a structure in the binary trace log."""
503
504  CTYPE_MAP = {
505    "u16": ctypes.c_uint16,
506    "u32": ctypes.c_uint32,
507    "u64": ctypes.c_uint64
508  }
509
510  def __init__(self, fields):
511    class TraceItem(ctypes.Structure):
512      _fields_ = Descriptor.CtypesFields(fields)
513
514      def __str__(self):
515        return ", ".join("%s: %s" % (field, self.__getattribute__(field))
516                         for field, _ in TraceItem._fields_)
517
518    self.ctype = TraceItem
519
520  def Read(self, trace, offset):
521    return self.ctype.from_buffer(trace, offset)
522
523  @staticmethod
524  def CtypesFields(fields):
525    return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields]
526
527
528# Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf
529# for the gory details.
530
531
532# Reference: struct perf_file_header in kernel/tools/perf/util/header.h
533TRACE_HEADER_DESC = Descriptor([
534  ("magic", "u64"),
535  ("size", "u64"),
536  ("attr_size", "u64"),
537  ("attrs_offset", "u64"),
538  ("attrs_size", "u64"),
539  ("data_offset", "u64"),
540  ("data_size", "u64"),
541  ("event_types_offset", "u64"),
542  ("event_types_size", "u64")
543])
544
545
546# Reference: /usr/include/linux/perf_event.h
547PERF_EVENT_ATTR_DESC = Descriptor([
548  ("type", "u32"),
549  ("size", "u32"),
550  ("config", "u64"),
551  ("sample_period_or_freq", "u64"),
552  ("sample_type", "u64"),
553  ("read_format", "u64"),
554  ("flags", "u64"),
555  ("wakeup_events_or_watermark", "u32"),
556  ("bp_type", "u32"),
557  ("bp_addr", "u64"),
558  ("bp_len", "u64")
559])
560
561
562# Reference: /usr/include/linux/perf_event.h
563PERF_EVENT_HEADER_DESC = Descriptor([
564  ("type", "u32"),
565  ("misc", "u16"),
566  ("size", "u16")
567])
568
569
570# Reference: kernel/events/core.c
571PERF_MMAP_EVENT_BODY_DESC = Descriptor([
572  ("pid", "u32"),
573  ("tid", "u32"),
574  ("addr", "u64"),
575  ("len", "u64"),
576  ("pgoff", "u64")
577])
578
579
580# perf_event_attr.sample_type bits control the set of
581# perf_sample_event fields.
582PERF_SAMPLE_IP = 1 << 0
583PERF_SAMPLE_TID = 1 << 1
584PERF_SAMPLE_TIME = 1 << 2
585PERF_SAMPLE_ADDR = 1 << 3
586PERF_SAMPLE_READ = 1 << 4
587PERF_SAMPLE_CALLCHAIN = 1 << 5
588PERF_SAMPLE_ID = 1 << 6
589PERF_SAMPLE_CPU = 1 << 7
590PERF_SAMPLE_PERIOD = 1 << 8
591PERF_SAMPLE_STREAM_ID = 1 << 9
592PERF_SAMPLE_RAW = 1 << 10
593
594
595# Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE.
596PERF_SAMPLE_EVENT_BODY_FIELDS = [
597  ("ip", "u64", PERF_SAMPLE_IP),
598  ("pid", "u32", PERF_SAMPLE_TID),
599  ("tid", "u32", PERF_SAMPLE_TID),
600  ("time", "u64", PERF_SAMPLE_TIME),
601  ("addr", "u64", PERF_SAMPLE_ADDR),
602  ("id", "u64", PERF_SAMPLE_ID),
603  ("stream_id", "u64", PERF_SAMPLE_STREAM_ID),
604  ("cpu", "u32", PERF_SAMPLE_CPU),
605  ("res", "u32", PERF_SAMPLE_CPU),
606  ("period", "u64", PERF_SAMPLE_PERIOD),
607  # Don't want to handle read format that comes after the period and
608  # before the callchain and has variable size.
609  ("nr", "u64", PERF_SAMPLE_CALLCHAIN)
610  # Raw data follows the callchain and is ignored.
611]
612
613
614PERF_SAMPLE_EVENT_IP_FORMAT = "u64"
615
616
617PERF_RECORD_MMAP = 1
618PERF_RECORD_SAMPLE = 9
619
620
621class TraceReader(object):
622  """Perf (linux-2.6/tools/perf) trace file reader."""
623
624  _TRACE_HEADER_MAGIC = 4993446653023372624
625
626  def __init__(self, trace_name):
627    self.trace_file = open(trace_name, "r")
628    self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE)
629    self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0)
630    if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC:
631      print >>sys.stderr, "Warning: unsupported trace header magic"
632    self.offset = self.trace_header.data_offset
633    self.limit = self.trace_header.data_offset + self.trace_header.data_size
634    assert self.limit <= self.trace.size(), \
635        "Trace data limit exceeds trace file size"
636    self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype)
637    assert self.trace_header.attrs_size != 0, \
638        "No perf event attributes found in the trace"
639    perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace,
640                                                self.trace_header.attrs_offset)
641    self.sample_event_body_desc = self._SampleEventBodyDesc(
642        perf_event_attr.sample_type)
643    self.callchain_supported = \
644        (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0
645    if self.callchain_supported:
646      self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT]
647      self.ip_size = ctypes.sizeof(self.ip_struct)
648
649  def ReadEventHeader(self):
650    if self.offset >= self.limit:
651      return None, 0
652    offset = self.offset
653    header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset)
654    self.offset += header.size
655    return header, offset
656
657  def ReadMmap(self, header, offset):
658    mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace,
659                                               offset + self.header_size)
660    # Read null-terminated filename.
661    filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
662                          offset + header.size]
663    mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))]
664    return mmap_info
665
666  def ReadSample(self, header, offset):
667    sample = self.sample_event_body_desc.Read(self.trace,
668                                              offset + self.header_size)
669    if not self.callchain_supported:
670      return sample
671    sample.ips = []
672    offset += self.header_size + ctypes.sizeof(sample)
673    for _ in xrange(sample.nr):
674      sample.ips.append(
675        self.ip_struct.from_buffer(self.trace, offset).value)
676      offset += self.ip_size
677    return sample
678
679  def Dispose(self):
680    self.trace.close()
681    self.trace_file.close()
682
683  def _SampleEventBodyDesc(self, sample_type):
684    assert (sample_type & PERF_SAMPLE_READ) == 0, \
685           "Can't hande read format in samples"
686    fields = [(field, format)
687              for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS
688              if (bit & sample_type) != 0]
689    return Descriptor(fields)
690
691
692OBJDUMP_SECTION_HEADER_RE = re.compile(
693  r"^\s*\d+\s(\.\S+)\s+[a-f0-9]")
694OBJDUMP_SYMBOL_LINE_RE = re.compile(
695  r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$")
696OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile(
697  r"^DYNAMIC SYMBOL TABLE")
698OBJDUMP_SKIP_RE = re.compile(
699  r"^.*ld\.so\.cache$")
700KERNEL_ALLSYMS_FILE = "/proc/kallsyms"
701PERF_KERNEL_ALLSYMS_RE = re.compile(
702  r".*kallsyms.*")
703KERNEL_ALLSYMS_LINE_RE = re.compile(
704  r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$")
705
706
707class LibraryRepo(object):
708  def __init__(self):
709    self.infos = []
710    self.names = set()
711    self.ticks = {}
712
713  def Load(self, mmap_info, code_map, options):
714    # Skip kernel mmaps when requested using the fact that their tid
715    # is 0.
716    if mmap_info.tid == 0 and not options.kernel:
717      return True
718    if OBJDUMP_SKIP_RE.match(mmap_info.filename):
719      return True
720    if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename):
721      return self._LoadKernelSymbols(code_map)
722    self.infos.append(mmap_info)
723    mmap_info.ticks = 0
724    mmap_info.unique_name = self._UniqueMmapName(mmap_info)
725    if not os.path.exists(mmap_info.filename):
726      return True
727    # Request section headers (-h), symbols (-t), and dynamic symbols
728    # (-T) from objdump.
729    # Unfortunately, section headers span two lines, so we have to
730    # keep the just seen section name (from the first line in each
731    # section header) in the after_section variable.
732    if mmap_info.filename.endswith(".ko"):
733      dynamic_symbols = ""
734    else:
735      dynamic_symbols = "-T"
736    process = subprocess.Popen(
737      "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename),
738      shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
739    pipe = process.stdout
740    after_section = None
741    code_sections = set()
742    reloc_sections = set()
743    dynamic = False
744    try:
745      for line in pipe:
746        if after_section:
747          if line.find("CODE") != -1:
748            code_sections.add(after_section)
749          if line.find("RELOC") != -1:
750            reloc_sections.add(after_section)
751          after_section = None
752          continue
753
754        match = OBJDUMP_SECTION_HEADER_RE.match(line)
755        if match:
756          after_section = match.group(1)
757          continue
758
759        if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line):
760          dynamic = True
761          continue
762
763        match = OBJDUMP_SYMBOL_LINE_RE.match(line)
764        if match:
765          start_address = int(match.group(1), 16)
766          origin_offset = start_address
767          flags = match.group(2)
768          section = match.group(3)
769          if section in code_sections:
770            if dynamic or section in reloc_sections:
771              start_address += mmap_info.addr
772            size = int(match.group(4), 16)
773            name = match.group(5)
774            origin = mmap_info.filename
775            code_map.Add(Code(name, start_address, start_address + size,
776                              origin, origin_offset))
777    finally:
778      pipe.close()
779    assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename
780
781  def Tick(self, pc):
782    for i, mmap_info in enumerate(self.infos):
783      if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len):
784        mmap_info.ticks += 1
785        self.infos[0], self.infos[i] = mmap_info, self.infos[0]
786        return True
787    return False
788
789  def _UniqueMmapName(self, mmap_info):
790    name = mmap_info.filename
791    index = 1
792    while name in self.names:
793      name = "%s-%d" % (mmap_info.filename, index)
794      index += 1
795    self.names.add(name)
796    return name
797
798  def _LoadKernelSymbols(self, code_map):
799    if not os.path.exists(KERNEL_ALLSYMS_FILE):
800      print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE
801      return False
802    kallsyms = open(KERNEL_ALLSYMS_FILE, "r")
803    code = None
804    for line in kallsyms:
805      match = KERNEL_ALLSYMS_LINE_RE.match(line)
806      if match:
807        start_address = int(match.group(1), 16)
808        end_address = start_address
809        name = match.group(2)
810        if code:
811          code.end_address = start_address
812          code_map.Add(code, 16)
813        code = Code(name, start_address, end_address, "kernel", 0)
814    return True
815
816
817def PrintReport(code_map, library_repo, arch, ticks, options):
818  print "Ticks per symbol:"
819  used_code = [code for code in code_map.UsedCode()]
820  used_code.sort(key=lambda x: x.self_ticks, reverse=True)
821  for i, code in enumerate(used_code):
822    code_ticks = code.self_ticks
823    print "%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks,
824                                    code.FullName(), code.origin)
825    if options.disasm_all or i < options.disasm_top:
826      code.PrintAnnotated(arch, options)
827  print
828  print "Ticks per library:"
829  mmap_infos = [m for m in library_repo.infos if m.ticks > 0]
830  mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
831  for mmap_info in mmap_infos:
832    mmap_ticks = mmap_info.ticks
833    print "%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks,
834                               mmap_info.unique_name)
835
836
837def PrintDot(code_map, options):
838  print "digraph G {"
839  for code in code_map.UsedCode():
840    if code.self_ticks < 10:
841      continue
842    print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name)
843    if code.callee_ticks:
844      for callee, ticks in code.callee_ticks.iteritems():
845        print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks)
846  print "}"
847
848
849if __name__ == "__main__":
850  parser = optparse.OptionParser(USAGE)
851  parser.add_option("--snapshot-log",
852                    default="obj/release/snapshot.log",
853                    help="V8 snapshot log file name [default: %default]")
854  parser.add_option("--log",
855                    default="v8.log",
856                    help="V8 log file name [default: %default]")
857  parser.add_option("--snapshot",
858                    default=False,
859                    action="store_true",
860                    help="process V8 snapshot log [default: %default]")
861  parser.add_option("--trace",
862                    default="perf.data",
863                    help="perf trace file name [default: %default]")
864  parser.add_option("--kernel",
865                    default=False,
866                    action="store_true",
867                    help="process kernel entries [default: %default]")
868  parser.add_option("--disasm-top",
869                    default=0,
870                    type="int",
871                    help=("number of top symbols to disassemble and annotate "
872                          "[default: %default]"))
873  parser.add_option("--disasm-all",
874                    default=False,
875                    action="store_true",
876                    help=("disassemble and annotate all used symbols "
877                          "[default: %default]"))
878  parser.add_option("--dot",
879                    default=False,
880                    action="store_true",
881                    help="produce dot output (WIP) [default: %default]")
882  parser.add_option("--quiet", "-q",
883                    default=False,
884                    action="store_true",
885                    help="no auxiliary messages [default: %default]")
886  parser.add_option("--gc-fake-mmap",
887                    default="/tmp/__v8_gc__",
888                    help="gc fake mmap file [default: %default]")
889  parser.add_option("--objdump",
890                    default="/usr/bin/objdump",
891                    help="objdump tool to use [default: %default]")
892  parser.add_option("--host-root",
893                    default="",
894                    help="Path to the host root [default: %default]")
895  options, args = parser.parse_args()
896
897  if not options.quiet:
898    if options.snapshot:
899      print "V8 logs: %s, %s, %s.ll" % (options.snapshot_log,
900                                        options.log,
901                                        options.log)
902    else:
903      print "V8 log: %s, %s.ll (no snapshot)" % (options.log, options.log)
904    print "Perf trace file: %s" % options.trace
905
906  V8_GC_FAKE_MMAP = options.gc_fake_mmap
907  HOST_ROOT = options.host_root
908  if os.path.exists(options.objdump):
909    disasm.OBJDUMP_BIN = options.objdump
910    OBJDUMP_BIN = options.objdump
911  else:
912    print "Cannot find %s, falling back to default objdump" % options.objdump
913
914  # Stats.
915  events = 0
916  ticks = 0
917  missed_ticks = 0
918  really_missed_ticks = 0
919  optimized_ticks = 0
920  generated_ticks = 0
921  v8_internal_ticks = 0
922  mmap_time = 0
923  sample_time = 0
924
925  # Process the snapshot log to fill the snapshot name map.
926  snapshot_name_map = {}
927  if options.snapshot:
928    snapshot_log_reader = SnapshotLogReader(log_name=options.snapshot_log)
929    snapshot_name_map = snapshot_log_reader.ReadNameMap()
930
931  # Initialize the log reader.
932  code_map = CodeMap()
933  log_reader = LogReader(log_name=options.log + ".ll",
934                         code_map=code_map,
935                         snapshot_pos_to_name=snapshot_name_map)
936  if not options.quiet:
937    print "Generated code architecture: %s" % log_reader.arch
938    print
939    sys.stdout.flush()
940
941  # Process the code and trace logs.
942  library_repo = LibraryRepo()
943  log_reader.ReadUpToGC()
944  trace_reader = TraceReader(options.trace)
945  while True:
946    header, offset = trace_reader.ReadEventHeader()
947    if not header:
948      break
949    events += 1
950    if header.type == PERF_RECORD_MMAP:
951      start = time.time()
952      mmap_info = trace_reader.ReadMmap(header, offset)
953      if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP:
954        log_reader.ReadUpToGC()
955      else:
956        library_repo.Load(mmap_info, code_map, options)
957      mmap_time += time.time() - start
958    elif header.type == PERF_RECORD_SAMPLE:
959      ticks += 1
960      start = time.time()
961      sample = trace_reader.ReadSample(header, offset)
962      code = code_map.Find(sample.ip)
963      if code:
964        code.Tick(sample.ip)
965        if code.codetype == Code.OPTIMIZED:
966          optimized_ticks += 1
967        elif code.codetype == Code.FULL_CODEGEN:
968          generated_ticks += 1
969        elif code.codetype == Code.V8INTERNAL:
970          v8_internal_ticks += 1
971      else:
972        missed_ticks += 1
973      if not library_repo.Tick(sample.ip) and not code:
974        really_missed_ticks += 1
975      if trace_reader.callchain_supported:
976        for ip in sample.ips:
977          caller_code = code_map.Find(ip)
978          if caller_code:
979            if code:
980              caller_code.CalleeTick(code)
981            code = caller_code
982      sample_time += time.time() - start
983
984  if options.dot:
985    PrintDot(code_map, options)
986  else:
987    PrintReport(code_map, library_repo, log_reader.arch, ticks, options)
988
989    if not options.quiet:
990      def PrintTicks(number, total, description):
991        print("%10d %5.1f%% ticks in %s" %
992              (number, 100.0*number/total, description))
993      print
994      print "Stats:"
995      print "%10d total trace events" % events
996      print "%10d total ticks" % ticks
997      print "%10d ticks not in symbols" % missed_ticks
998      unaccounted = "unaccounted ticks"
999      if really_missed_ticks > 0:
1000        unaccounted += " (probably in the kernel, try --kernel)"
1001      PrintTicks(really_missed_ticks, ticks, unaccounted)
1002      PrintTicks(optimized_ticks, ticks, "ticks in optimized code")
1003      PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code")
1004      PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*")
1005      print "%10d total symbols" % len([c for c in code_map.AllCode()])
1006      print "%10d used symbols" % len([c for c in code_map.UsedCode()])
1007      print "%9.2fs library processing time" % mmap_time
1008      print "%9.2fs tick processing time" % sample_time
1009
1010  log_reader.Dispose()
1011  trace_reader.Dispose()
1012