#!/usr/bin/env python3 # Copyright 2023 The Chromium Authors # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. # # Processes the raw output from containers_memory_usage into CSV files. Each CSV # file contains the results for all tested container types for a given key and # value type. # # Usage: # $ out/release/containers_memory_benchmark &> output.txt # $ python3 analyze_containers_memory_benchmark.py < output.txt -o bench-results import argparse from collections.abc import Sequence import csv import os.path import re import sys from typing import Optional _HEADER_RE = re.compile(r'===== (?P.+) =====') _ITER_RE = re.compile(r'iteration (?P\d+)') _ALLOC_RE = re.compile(r'alloc address (?P.+) size (?P\d+)') _FREED_RE = re.compile(r'freed address (?P.+)') class ContainerStatsProcessor: def __init__(self, name: str): # e.g. base::flat_map self._name = name # current number of elements in the container self._n = None # map of address to size for currently active allocations. Needed because # the free handler only records an address, and not a size. self._addr_to_size = {} # running count of the number of bytes needed at the current iteration self._running_size = 0 # map of container size to number of bytes used to store a container of that # size. Keys are expected to be contiguous from 0 to the total iteration # count. self._data = {} @property def name(self): return self._name @property def data(self): return self._data def did_alloc(self, addr: str, size: int): self._addr_to_size[addr] = size self._running_size += size def did_free(self, addr: str): size = self._addr_to_size.pop(addr) self._running_size -= size def did_iterate(self, n: int): if self._n is not None: self.flush_current_iteration_if_needed() self._n = n def flush_current_iteration_if_needed(self): self._data[self._n] = self._running_size class TestCaseProcessor: def __init__(self, name: str): # e.g. int -> std::string self._name = name # containers for which all allocation data has been processed and finalized. self._finalized_stats: list[ContainerStatsProcessor] = [] # the current container being processed. self._current_container_stats: Optional[ContainerStatsProcessor] = None @property def current_container_stats(self): return self._current_container_stats def did_begin_container_stats(self, container_type: str): self._finalize_current_container_stats_if_needed() self._current_container_stats = ContainerStatsProcessor(container_type) def did_finish_container_stats(self, output_dir: str): self._finalize_current_container_stats_if_needed() with open( os.path.join(output_dir, f'{self._name}.csv'), 'w', newline='' ) as f: writer = csv.writer(f) # First the column headers... writer.writerow( ['size'] + [stats.name for stats in self._finalized_stats] ) # In theory, all processed containers should have the same number of keys, # but assert just to be sure. keys = [] for stats in self._finalized_stats: if not keys: keys = sorted(stats.data.keys()) else: assert keys == sorted(stats.data.keys()) for key in keys: writer.writerow( [key] + [stats.data[key] for stats in self._finalized_stats] ) def _finalize_current_container_stats_if_needed(self): if self._current_container_stats: self._current_container_stats.flush_current_iteration_if_needed() self._finalized_stats.append(self._current_container_stats) self._current_container_stats = None def main(argv: Sequence[str]) -> None: parser = argparse.ArgumentParser( description='Processes raw output from containers_memory_usage into CSVs.' ) parser.add_argument( '-o', help='directory to write CSV files to', required=True ) args = parser.parse_args() # It would be nicer to use a ContextManager, but that complicates splitting up # the input and iterating through it. This is "good enough". processor: Optional[TestCaseProcessor] = None for line in sys.stdin: line = line.strip() if '->' in line: if processor: processor.did_finish_container_stats(args.o) processor = TestCaseProcessor(line) continue match = _HEADER_RE.match(line) if match: processor.did_begin_container_stats(match.group('name')) match = _ITER_RE.match(line) if match: processor.current_container_stats.did_iterate(int(match.group('iter'))) continue match = _ALLOC_RE.match(line) if match: processor.current_container_stats.did_alloc( match.group('alloc_addr'), int(match.group('size')) ) continue match = _FREED_RE.match(line) if match: processor.current_container_stats.did_free(match.group('freed_addr')) continue if processor: processor.did_finish_container_stats(args.o) if __name__ == '__main__': main(sys.argv)