• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2
3# Copyright (C) 2022 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16import dataclasses
17import datetime
18import glob
19import json
20import logging
21import re
22import shutil
23import subprocess
24import textwrap
25from pathlib import Path
26from typing import Iterable
27
28from bp2build_metrics_proto.bp2build_metrics_pb2 import Bp2BuildMetrics
29from metrics_proto.metrics_pb2 import MetricsBase
30from metrics_proto.metrics_pb2 import PerfInfo
31from metrics_proto.metrics_pb2 import SoongBuildMetrics
32
33import util
34
35
36@dataclasses.dataclass
37class PerfInfoOrEvent:
38  """
39  A duck-typed union of `soong_build_metrics.PerfInfo` and
40  `soong_build_bp2build_metrics.Event` protobuf message types
41  """
42  name: str
43  real_time: datetime.timedelta
44  start_time: datetime.datetime
45  description: str = ''  # Bp2BuildMetrics#Event doesn't have description
46
47  def __post_init__(self):
48    if isinstance(self.real_time, int):
49      self.real_time = datetime.timedelta(microseconds=self.real_time / 1000)
50    if isinstance(self.start_time, int):
51      epoch = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc)
52      self.start_time = epoch + datetime.timedelta(
53          microseconds=self.start_time / 1000)
54
55
56SOONG_PB = 'soong_metrics'
57SOONG_BUILD_PB = 'soong_build_metrics.pb'
58BP2BUILD_PB = 'bp2build_metrics.pb'
59
60
61def _copy_pbs_to(d: Path):
62  soong_pb = util.get_out_dir().joinpath(SOONG_PB)
63  soong_build_pb = util.get_out_dir().joinpath(SOONG_BUILD_PB)
64  bp2build_pb = util.get_out_dir().joinpath(BP2BUILD_PB)
65  if soong_pb.exists():
66    shutil.copy(soong_pb, d.joinpath(SOONG_PB))
67  if soong_build_pb.exists():
68    shutil.copy(soong_build_pb, d.joinpath(SOONG_BUILD_PB))
69  if bp2build_pb.exists():
70    shutil.copy(bp2build_pb, d.joinpath(BP2BUILD_PB))
71
72
73def archive_run(d: Path, build_info: dict[str, any]):
74  _copy_pbs_to(d)
75  with open(d.joinpath(util.BUILD_INFO_JSON), 'w') as f:
76    json.dump(build_info, f, indent=True)
77
78
79def read_pbs(d: Path) -> dict[str, str]:
80  """
81  Reads metrics data from pb files and archives the file by copying
82  them under the log_dir.
83  Soong_build event names may contain "mixed_build" event. To normalize the
84  event names between mixed builds and soong-only build, convert
85    `soong_build/soong_build.xyz` and `soong_build/soong_build.mixed_build.xyz`
86  both to simply `soong_build/*.xyz`
87  """
88  soong_pb = d.joinpath(SOONG_PB)
89  soong_build_pb = d.joinpath(SOONG_BUILD_PB)
90  bp2build_pb = d.joinpath(BP2BUILD_PB)
91
92  events: list[PerfInfoOrEvent] = []
93
94  def extract_perf_info(root_obj):
95    for field_name in dir(root_obj):
96      if field_name.startswith('__'):
97        continue
98      field_value = getattr(root_obj, field_name)
99      if isinstance(field_value, Iterable):
100        for item in field_value:
101          if not isinstance(item, PerfInfo):
102            break
103          events.append(
104            PerfInfoOrEvent(item.name, item.real_time, item.start_time,
105                            item.description))
106
107  if soong_pb.exists():
108    metrics_base = MetricsBase()
109    with open(soong_pb, "rb") as f:
110      metrics_base.ParseFromString(f.read())
111    extract_perf_info(metrics_base)
112
113  if soong_build_pb.exists():
114    soong_build_metrics = SoongBuildMetrics()
115    with open(soong_build_pb, "rb") as f:
116      soong_build_metrics.ParseFromString(f.read())
117    extract_perf_info(soong_build_metrics)
118
119  if bp2build_pb.exists():
120    bp2build_metrics = Bp2BuildMetrics()
121    with open(bp2build_pb, "rb") as f:
122      bp2build_metrics.ParseFromString(f.read())
123    for event in bp2build_metrics.events:
124      events.append(
125        PerfInfoOrEvent(event.name, event.real_time, event.start_time, ''))
126
127  events.sort(key=lambda e: e.start_time)
128
129  def normalize(desc: str) -> str:
130    return re.sub(r'^(?:soong_build|mixed_build)', '*', desc)
131
132  return {f'{m.name}/{normalize(m.description)}': util.hhmmss(m.real_time) for m
133          in events}
134
135
136Row = dict[str, any]
137
138
139def _get_column_headers(rows: list[Row], allow_cycles: bool) -> list[str]:
140  """
141  Basically a topological sort or column headers. For each Row, the column order
142  can be thought of as a partial view of a chain of events in chronological
143  order. It's a partial view because not all events may have needed to occur for
144  a build.
145  """
146
147  @dataclasses.dataclass
148  class Column:
149    header: str
150    indegree: int
151    nexts: set[str]
152
153    def __str__(self):
154      return f'#{self.indegree}->{self.header}->{self.nexts}'
155
156    def dfs(self, target: str, visited: set[str] = None) -> list[str]:
157      if not visited:
158        visited = set()
159      if target == self.header and self.header in visited:
160        return [self.header]
161      for n in self.nexts:
162        if n in visited:
163          continue
164        visited.add(n)
165        next_col = all_cols[n]
166        path = next_col.dfs(target, visited)
167        if path:
168          return [self.header, *path]
169      return []
170
171  all_cols: dict[str, Column] = {}
172  for row in rows:
173    prev_col = None
174    for col in row:
175      if col not in all_cols:
176        column = Column(col, 0, set())
177        all_cols[col] = column
178      if prev_col is not None and col not in prev_col.nexts:
179        all_cols[col].indegree += 1
180        prev_col.nexts.add(col)
181      prev_col = all_cols[col]
182
183  acc = []
184  entries = [c for c in all_cols.values()]
185  while len(entries) > 0:
186    # sorting alphabetically to break ties for concurrent events
187    entries.sort(key=lambda c: c.header, reverse=True)
188    entries.sort(key=lambda c: c.indegree, reverse=True)
189    entry = entries.pop()
190    # take only one to maintain alphabetical sort
191    if entry.indegree != 0:
192      cycle = '->'.join(entry.dfs(entry.header))
193      s = f'event ordering has a cycle {cycle}'
194      logging.warning(s)
195      if not allow_cycles:
196        raise ValueError(s)
197    acc.append(entry.header)
198    for n in entry.nexts:
199      n = all_cols.get(n)
200      if n is not None:
201        n.indegree -= 1
202      else:
203        if not allow_cycles:
204          raise ValueError(f'unexpected error for: {n}')
205  return acc
206
207
208def get_build_info_and_perf(d: Path) -> dict[str, any]:
209  perf = read_pbs(d)
210  build_info_json = d.joinpath(util.BUILD_INFO_JSON)
211  if not build_info_json.exists():
212    return perf
213  with open(build_info_json, 'r') as f:
214    build_info = json.load(f)
215    return build_info | perf
216
217
218def tabulate_metrics_csv(log_dir: Path):
219  rows: list[dict[str, any]] = []
220  dirs = glob.glob(f'{util.RUN_DIR_PREFIX}*', root_dir=log_dir)
221  dirs.sort(key=lambda x: int(x[1 + len(util.RUN_DIR_PREFIX):]))
222  for d in dirs:
223    d = log_dir.joinpath(d)
224    row = get_build_info_and_perf(d)
225    rows.append(row)
226
227  headers: list[str] = _get_column_headers(rows, allow_cycles=True)
228
229  def row2line(r):
230    return ','.join([str(r.get(col) or '') for col in headers])
231
232  lines = [','.join(headers)]
233  lines.extend(row2line(r) for r in rows)
234
235  with open(log_dir.joinpath(util.METRICS_TABLE), mode='wt') as f:
236    f.writelines(f'{line}\n' for line in lines)
237
238
239def display_tabulated_metrics(log_dir: Path):
240  cmd_str = util.get_cmd_to_display_tabulated_metrics(log_dir)
241  output = subprocess.check_output(cmd_str, shell=True, text=True)
242  logging.info(textwrap.dedent(f'''
243  %s
244  TIPS:
245  1 To view key metrics in metrics.csv:
246    %s
247  2 To view column headers:
248    %s
249    '''), output, cmd_str, util.get_csv_columns_cmd(log_dir))
250