1#!/usr/bin/env python3 2# Copyright 2023 The Chromium Authors 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5# 6# Processes the raw output from containers_memory_usage into CSV files. Each CSV 7# file contains the results for all tested container types for a given key and 8# value type. 9# 10# Usage: 11# $ out/release/containers_memory_benchmark &> output.txt 12# $ python3 analyze_containers_memory_benchmark.py < output.txt -o bench-results 13 14import argparse 15from collections.abc import Sequence 16import csv 17import os.path 18import re 19import sys 20from typing import Optional 21 22 23_HEADER_RE = re.compile(r'===== (?P<name>.+) =====') 24_ITER_RE = re.compile(r'iteration (?P<iter>\d+)') 25_ALLOC_RE = re.compile(r'alloc address (?P<alloc_addr>.+) size (?P<size>\d+)') 26_FREED_RE = re.compile(r'freed address (?P<freed_addr>.+)') 27 28 29class ContainerStatsProcessor: 30 31 def __init__(self, name: str): 32 # e.g. base::flat_map 33 self._name = name 34 # current number of elements in the container 35 self._n = None 36 # map of address to size for currently active allocations. Needed because 37 # the free handler only records an address, and not a size. 38 self._addr_to_size = {} 39 # running count of the number of bytes needed at the current iteration 40 self._running_size = 0 41 # map of container size to number of bytes used to store a container of that 42 # size. Keys are expected to be contiguous from 0 to the total iteration 43 # count. 44 self._data = {} 45 46 @property 47 def name(self): 48 return self._name 49 50 @property 51 def data(self): 52 return self._data 53 54 def did_alloc(self, addr: str, size: int): 55 self._addr_to_size[addr] = size 56 self._running_size += size 57 58 def did_free(self, addr: str): 59 size = self._addr_to_size.pop(addr) 60 self._running_size -= size 61 62 def did_iterate(self, n: int): 63 if self._n is not None: 64 self.flush_current_iteration_if_needed() 65 self._n = n 66 67 def flush_current_iteration_if_needed(self): 68 self._data[self._n] = self._running_size 69 70 71class TestCaseProcessor: 72 73 def __init__(self, name: str): 74 # e.g. int -> std::string 75 self._name = name 76 # containers for which all allocation data has been processed and finalized. 77 self._finalized_stats: list[ContainerStatsProcessor] = [] 78 # the current container being processed. 79 self._current_container_stats: Optional[ContainerStatsProcessor] = None 80 81 @property 82 def current_container_stats(self): 83 return self._current_container_stats 84 85 def did_begin_container_stats(self, container_type: str): 86 self._finalize_current_container_stats_if_needed() 87 self._current_container_stats = ContainerStatsProcessor(container_type) 88 89 def did_finish_container_stats(self, output_dir: str): 90 self._finalize_current_container_stats_if_needed() 91 with open( 92 os.path.join(output_dir, f'{self._name}.csv'), 'w', newline='' 93 ) as f: 94 writer = csv.writer(f) 95 # First the column headers... 96 writer.writerow( 97 ['size'] + [stats.name for stats in self._finalized_stats] 98 ) 99 # In theory, all processed containers should have the same number of keys, 100 # but assert just to be sure. 101 keys = [] 102 for stats in self._finalized_stats: 103 if not keys: 104 keys = sorted(stats.data.keys()) 105 else: 106 assert keys == sorted(stats.data.keys()) 107 for key in keys: 108 writer.writerow( 109 [key] + [stats.data[key] for stats in self._finalized_stats] 110 ) 111 112 def _finalize_current_container_stats_if_needed(self): 113 if self._current_container_stats: 114 self._current_container_stats.flush_current_iteration_if_needed() 115 self._finalized_stats.append(self._current_container_stats) 116 self._current_container_stats = None 117 118 119def main(argv: Sequence[str]) -> None: 120 parser = argparse.ArgumentParser( 121 description='Processes raw output from containers_memory_usage into CSVs.' 122 ) 123 parser.add_argument( 124 '-o', help='directory to write CSV files to', required=True 125 ) 126 args = parser.parse_args() 127 128 # It would be nicer to use a ContextManager, but that complicates splitting up 129 # the input and iterating through it. This is "good enough". 130 processor: Optional[TestCaseProcessor] = None 131 132 for line in sys.stdin: 133 line = line.strip() 134 if '->' in line: 135 if processor: 136 processor.did_finish_container_stats(args.o) 137 processor = TestCaseProcessor(line) 138 continue 139 140 match = _HEADER_RE.match(line) 141 if match: 142 processor.did_begin_container_stats(match.group('name')) 143 144 match = _ITER_RE.match(line) 145 if match: 146 processor.current_container_stats.did_iterate(int(match.group('iter'))) 147 continue 148 149 match = _ALLOC_RE.match(line) 150 if match: 151 processor.current_container_stats.did_alloc( 152 match.group('alloc_addr'), int(match.group('size')) 153 ) 154 continue 155 156 match = _FREED_RE.match(line) 157 if match: 158 processor.current_container_stats.did_free(match.group('freed_addr')) 159 continue 160 161 if processor: 162 processor.did_finish_container_stats(args.o) 163 164 165if __name__ == '__main__': 166 main(sys.argv) 167