# Copyright 2015 The Chromium OS Authors. All rights reserved. # Use of this source code is governed by a BSD-style license # that can be found in the LICENSE file. import argparse import logging import mmap import os import signal import struct import sys import threading import time # some magic numbers: see http://goo.gl/ecAgke for Intel docs PCI_IMC_BAR_OFFSET = 0x48 IMC_DRAM_GT_REQUESTS = 0x5040 # GPU IMC_DRAM_IA_REQUESTS = 0x5044 # CPU IMC_DRAM_IO_REQUESTS = 0x5048 # PCIe, Display Engine, USB, etc. IMC_DRAM_DATA_READS = 0x5050 # read traffic IMC_DRAM_DATA_WRITES = 0x5054 # write traffic IMC_MMAP_SIZE = 0x6000 CACHE_LINE = 64.0 MEGABYTE = 1048576.0 RATE_FIELD_FORMAT = '%s: %5d MB/s' RAW_FIELD_FORMAT = '%s: %d' class IMCCounter: """Small struct-like class to keep track of the location and attributes for each counter. Parameters: name: short, unique identifying token for this counter type idx: offset into the IMC memory where we can find this counter total: True if we should count this in the number for total bandwidth """ def __init__(self, name, idx, total): self.name = name self.idx = idx self.total = total counters = [ # name idx total IMCCounter("GT", IMC_DRAM_GT_REQUESTS, False), IMCCounter("IA", IMC_DRAM_IA_REQUESTS, False), IMCCounter("IO", IMC_DRAM_IO_REQUESTS, False), IMCCounter("RD", IMC_DRAM_DATA_READS, True), IMCCounter("WR", IMC_DRAM_DATA_WRITES, True), ] class MappedFile: """Helper class to wrap mmap calls in a context manager so they are always cleaned up, and to help extract values from the bytes. Parameters: filename: name of file to mmap offset: offset from beginning of file to mmap from size: amount of the file to mmap """ def __init__(self, filename, offset, size): self._filename = filename self._offset = offset self._size = size def __enter__(self): self._f = open(self._filename, 'rb') try: self._mm = mmap.mmap(self._f.fileno(), self._size, mmap.MAP_SHARED, mmap.PROT_READ, offset=self._offset) except mmap.error: self._f.close() raise return self def __exit__(self, exc_type, exc_val, exc_tb): self._mm.close() self._f.close() def bytes_to_python(self, offset, fmt): """Grab a portion of an mmapped file and return the bytes as a python object. Parameters: offset: offset into the mmapped file to start at fmt: string containing the struct type to extract from the file Returns: a Struct containing the bytes starting at offset into the mmapped file, reified as python values """ s = struct.Struct(fmt) return s.unpack(self._mm[offset:offset+s.size]) def file_bytes_to_python(f, offset, fmt): """Grab a portion of a regular file and return the bytes as a python object. Parameters: f: file-like object to extract from offset: offset into the mmapped file to start at fmt: string containing the struct type to extract from the file Returns: a Struct containing the bytes starting at offset into f, reified as python values """ s = struct.Struct(fmt) f.seek(0) bs = f.read() if len(bs) >= offset + s.size: return s.unpack(bs[offset:offset+s.size]) else: raise IOError('Invalid seek in file') def uint32_diff(l, r): """Compute the difference of two 32-bit numbers as another 32-bit number. Since the counters are monotonically increasing, we always want the unsigned difference. """ return l - r if l >= r else l - r + 0x100000000 class MemoryBandwidthLogger(threading.Thread): """Class for gathering memory usage in MB/s on x86 systems. raw: dump raw counter values seconds_period: time period between reads If you are using non-raw mode and your seconds_period is too high, your results might be nonsense because the counters might have wrapped around. Parameters: raw: True if you want to dump raw counters. These will simply tell you the number of cache-line-size transactions that have occurred so far. seconds_period: Duration to wait before dumping counters again. Defaults to 2 seconds. """ def __init__(self, raw, seconds_period=2): super(MemoryBandwidthLogger, self).__init__() self._raw = raw self._seconds_period = seconds_period self._running = True def run(self): # get base address register and align to 4k try: bar_addr = self._get_pci_imc_bar() except IOError: logging.error('Cannot read base address register') return bar_addr = (bar_addr // 4096) * 4096 # set up the output formatting. raw counters don't have any # particular meaning in MB/s since they count how many cache # lines have been read from or written to up to that point, # and so don't represent a rate. # TOTAL is always given as a rate, though. rate_factor = CACHE_LINE / (self._seconds_period * MEGABYTE) if self._raw: field_format = RAW_FIELD_FORMAT else: field_format = RATE_FIELD_FORMAT # get /dev/mem and mmap it with MappedFile('/dev/mem', bar_addr, IMC_MMAP_SIZE) as mm: # take initial samples, then take samples every seconds_period last_values = self._take_samples(mm) while self._running: time.sleep(self._seconds_period) values = self._take_samples(mm) # we need to calculate the MB differences no matter what # because the "total" field uses it even when we are in # raw mode mb_diff = { c.name: uint32_diff(values[c.name], last_values[c.name]) * rate_factor for c in counters } output_dict = values if self._raw else mb_diff output = list((c.name, output_dict[c.name]) for c in counters) total_rate = sum(mb_diff[c.name] for c in counters if c.total) output_str = \ ' '.join(field_format % (k, v) for k, v in output) + \ ' ' + (RATE_FIELD_FORMAT % ('TOTAL', total_rate)) logging.debug(output_str) last_values = values def stop(self): self._running = False def _get_pci_imc_bar(self): """Get the base address register for the IMC (integrated memory controller). This is later used to extract counter values. Returns: physical address for the IMC. """ with open('/proc/bus/pci/00/00.0', 'rb') as pci: return file_bytes_to_python(pci, PCI_IMC_BAR_OFFSET, '=Q')[0] def _take_samples(self, mm): """Get samples for each type of memory transaction. Parameters: mm: MappedFile representing physical memory Returns: dictionary mapping counter type to counter value """ return { c.name: mm.bytes_to_python(c.idx, '=I')[0] for c in counters }