• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# Copyright 2023 The Chromium Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5#
6# Processes the raw output from containers_memory_usage into CSV files. Each CSV
7# file contains the results for all tested container types for a given key and
8# value type.
9#
10# Usage:
11# $ out/release/containers_memory_benchmark &> output.txt
12# $ python3 analyze_containers_memory_benchmark.py < output.txt -o bench-results
13
14import argparse
15from collections.abc import Sequence
16import csv
17import os.path
18import re
19import sys
20from typing import Optional
21
22
23_HEADER_RE = re.compile(r'===== (?P<name>.+) =====')
24_ITER_RE = re.compile(r'iteration (?P<iter>\d+)')
25_ALLOC_RE = re.compile(r'alloc address (?P<alloc_addr>.+) size (?P<size>\d+)')
26_FREED_RE = re.compile(r'freed address (?P<freed_addr>.+)')
27
28
29class ContainerStatsProcessor:
30
31  def __init__(self, name: str):
32    # e.g. base::flat_map
33    self._name = name
34    # current number of elements in the container
35    self._n = None
36    # map of address to size for currently active allocations. Needed because
37    # the free handler only records an address, and not a size.
38    self._addr_to_size = {}
39    # running count of the number of bytes needed at the current iteration
40    self._running_size = 0
41    # map of container size to number of bytes used to store a container of that
42    # size. Keys are expected to be contiguous from 0 to the total iteration
43    # count.
44    self._data = {}
45
46  @property
47  def name(self):
48    return self._name
49
50  @property
51  def data(self):
52    return self._data
53
54  def did_alloc(self, addr: str, size: int):
55    self._addr_to_size[addr] = size
56    self._running_size += size
57
58  def did_free(self, addr: str):
59    size = self._addr_to_size.pop(addr)
60    self._running_size -= size
61
62  def did_iterate(self, n: int):
63    if self._n is not None:
64      self.flush_current_iteration_if_needed()
65    self._n = n
66
67  def flush_current_iteration_if_needed(self):
68    self._data[self._n] = self._running_size
69
70
71class TestCaseProcessor:
72
73  def __init__(self, name: str):
74    # e.g. int -> std::string
75    self._name = name
76    # containers for which all allocation data has been processed and finalized.
77    self._finalized_stats: list[ContainerStatsProcessor] = []
78    # the current container being processed.
79    self._current_container_stats: Optional[ContainerStatsProcessor] = None
80
81  @property
82  def current_container_stats(self):
83    return self._current_container_stats
84
85  def did_begin_container_stats(self, container_type: str):
86    self._finalize_current_container_stats_if_needed()
87    self._current_container_stats = ContainerStatsProcessor(container_type)
88
89  def did_finish_container_stats(self, output_dir: str):
90    self._finalize_current_container_stats_if_needed()
91    with open(
92        os.path.join(output_dir, f'{self._name}.csv'), 'w', newline=''
93    ) as f:
94      writer = csv.writer(f)
95      # First the column headers...
96      writer.writerow(
97          ['size'] + [stats.name for stats in self._finalized_stats]
98      )
99      # In theory, all processed containers should have the same number of keys,
100      # but assert just to be sure.
101      keys = []
102      for stats in self._finalized_stats:
103        if not keys:
104          keys = sorted(stats.data.keys())
105        else:
106          assert keys == sorted(stats.data.keys())
107      for key in keys:
108        writer.writerow(
109            [key] + [stats.data[key] for stats in self._finalized_stats]
110        )
111
112  def _finalize_current_container_stats_if_needed(self):
113    if self._current_container_stats:
114      self._current_container_stats.flush_current_iteration_if_needed()
115      self._finalized_stats.append(self._current_container_stats)
116      self._current_container_stats = None
117
118
119def main(argv: Sequence[str]) -> None:
120  parser = argparse.ArgumentParser(
121      description='Processes raw output from containers_memory_usage into CSVs.'
122  )
123  parser.add_argument(
124      '-o', help='directory to write CSV files to', required=True
125  )
126  args = parser.parse_args()
127
128  # It would be nicer to use a ContextManager, but that complicates splitting up
129  # the input and iterating through it. This is "good enough".
130  processor: Optional[TestCaseProcessor] = None
131
132  for line in sys.stdin:
133    line = line.strip()
134    if '->' in line:
135      if processor:
136        processor.did_finish_container_stats(args.o)
137      processor = TestCaseProcessor(line)
138      continue
139
140    match = _HEADER_RE.match(line)
141    if match:
142      processor.did_begin_container_stats(match.group('name'))
143
144    match = _ITER_RE.match(line)
145    if match:
146      processor.current_container_stats.did_iterate(int(match.group('iter')))
147      continue
148
149    match = _ALLOC_RE.match(line)
150    if match:
151      processor.current_container_stats.did_alloc(
152          match.group('alloc_addr'), int(match.group('size'))
153      )
154      continue
155
156    match = _FREED_RE.match(line)
157    if match:
158      processor.current_container_stats.did_free(match.group('freed_addr'))
159      continue
160
161  if processor:
162    processor.did_finish_container_stats(args.o)
163
164
165if __name__ == '__main__':
166  main(sys.argv)
167