1#!/usr/bin/env python3 2 3# Copyright (C) 2022 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16import dataclasses 17import datetime 18import glob 19import json 20import logging 21import re 22import shutil 23import subprocess 24import textwrap 25from pathlib import Path 26from typing import Iterable 27 28from bp2build_metrics_proto.bp2build_metrics_pb2 import Bp2BuildMetrics 29from metrics_proto.metrics_pb2 import MetricsBase 30from metrics_proto.metrics_pb2 import PerfInfo 31from metrics_proto.metrics_pb2 import SoongBuildMetrics 32 33import util 34 35 36@dataclasses.dataclass 37class PerfInfoOrEvent: 38 """ 39 A duck-typed union of `soong_build_metrics.PerfInfo` and 40 `soong_build_bp2build_metrics.Event` protobuf message types 41 """ 42 name: str 43 real_time: datetime.timedelta 44 start_time: datetime.datetime 45 description: str = '' # Bp2BuildMetrics#Event doesn't have description 46 47 def __post_init__(self): 48 if isinstance(self.real_time, int): 49 self.real_time = datetime.timedelta(microseconds=self.real_time / 1000) 50 if isinstance(self.start_time, int): 51 epoch = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc) 52 self.start_time = epoch + datetime.timedelta( 53 microseconds=self.start_time / 1000) 54 55 56SOONG_PB = 'soong_metrics' 57SOONG_BUILD_PB = 'soong_build_metrics.pb' 58BP2BUILD_PB = 'bp2build_metrics.pb' 59 60 61def _copy_pbs_to(d: Path): 62 soong_pb = util.get_out_dir().joinpath(SOONG_PB) 63 soong_build_pb = util.get_out_dir().joinpath(SOONG_BUILD_PB) 64 bp2build_pb = util.get_out_dir().joinpath(BP2BUILD_PB) 65 if soong_pb.exists(): 66 shutil.copy(soong_pb, d.joinpath(SOONG_PB)) 67 if soong_build_pb.exists(): 68 shutil.copy(soong_build_pb, d.joinpath(SOONG_BUILD_PB)) 69 if bp2build_pb.exists(): 70 shutil.copy(bp2build_pb, d.joinpath(BP2BUILD_PB)) 71 72 73def archive_run(d: Path, build_info: dict[str, any]): 74 _copy_pbs_to(d) 75 with open(d.joinpath(util.BUILD_INFO_JSON), 'w') as f: 76 json.dump(build_info, f, indent=True) 77 78 79def read_pbs(d: Path) -> dict[str, str]: 80 """ 81 Reads metrics data from pb files and archives the file by copying 82 them under the log_dir. 83 Soong_build event names may contain "mixed_build" event. To normalize the 84 event names between mixed builds and soong-only build, convert 85 `soong_build/soong_build.xyz` and `soong_build/soong_build.mixed_build.xyz` 86 both to simply `soong_build/*.xyz` 87 """ 88 soong_pb = d.joinpath(SOONG_PB) 89 soong_build_pb = d.joinpath(SOONG_BUILD_PB) 90 bp2build_pb = d.joinpath(BP2BUILD_PB) 91 92 events: list[PerfInfoOrEvent] = [] 93 94 def extract_perf_info(root_obj): 95 for field_name in dir(root_obj): 96 if field_name.startswith('__'): 97 continue 98 field_value = getattr(root_obj, field_name) 99 if isinstance(field_value, Iterable): 100 for item in field_value: 101 if not isinstance(item, PerfInfo): 102 break 103 events.append( 104 PerfInfoOrEvent(item.name, item.real_time, item.start_time, 105 item.description)) 106 107 if soong_pb.exists(): 108 metrics_base = MetricsBase() 109 with open(soong_pb, "rb") as f: 110 metrics_base.ParseFromString(f.read()) 111 extract_perf_info(metrics_base) 112 113 if soong_build_pb.exists(): 114 soong_build_metrics = SoongBuildMetrics() 115 with open(soong_build_pb, "rb") as f: 116 soong_build_metrics.ParseFromString(f.read()) 117 extract_perf_info(soong_build_metrics) 118 119 if bp2build_pb.exists(): 120 bp2build_metrics = Bp2BuildMetrics() 121 with open(bp2build_pb, "rb") as f: 122 bp2build_metrics.ParseFromString(f.read()) 123 for event in bp2build_metrics.events: 124 events.append( 125 PerfInfoOrEvent(event.name, event.real_time, event.start_time, '')) 126 127 events.sort(key=lambda e: e.start_time) 128 129 def normalize(desc: str) -> str: 130 return re.sub(r'^(?:soong_build|mixed_build)', '*', desc) 131 132 return {f'{m.name}/{normalize(m.description)}': util.hhmmss(m.real_time) for m 133 in events} 134 135 136Row = dict[str, any] 137 138 139def _get_column_headers(rows: list[Row], allow_cycles: bool) -> list[str]: 140 """ 141 Basically a topological sort or column headers. For each Row, the column order 142 can be thought of as a partial view of a chain of events in chronological 143 order. It's a partial view because not all events may have needed to occur for 144 a build. 145 """ 146 147 @dataclasses.dataclass 148 class Column: 149 header: str 150 indegree: int 151 nexts: set[str] 152 153 def __str__(self): 154 return f'#{self.indegree}->{self.header}->{self.nexts}' 155 156 def dfs(self, target: str, visited: set[str] = None) -> list[str]: 157 if not visited: 158 visited = set() 159 if target == self.header and self.header in visited: 160 return [self.header] 161 for n in self.nexts: 162 if n in visited: 163 continue 164 visited.add(n) 165 next_col = all_cols[n] 166 path = next_col.dfs(target, visited) 167 if path: 168 return [self.header, *path] 169 return [] 170 171 all_cols: dict[str, Column] = {} 172 for row in rows: 173 prev_col = None 174 for col in row: 175 if col not in all_cols: 176 column = Column(col, 0, set()) 177 all_cols[col] = column 178 if prev_col is not None and col not in prev_col.nexts: 179 all_cols[col].indegree += 1 180 prev_col.nexts.add(col) 181 prev_col = all_cols[col] 182 183 acc = [] 184 entries = [c for c in all_cols.values()] 185 while len(entries) > 0: 186 # sorting alphabetically to break ties for concurrent events 187 entries.sort(key=lambda c: c.header, reverse=True) 188 entries.sort(key=lambda c: c.indegree, reverse=True) 189 entry = entries.pop() 190 # take only one to maintain alphabetical sort 191 if entry.indegree != 0: 192 cycle = '->'.join(entry.dfs(entry.header)) 193 s = f'event ordering has a cycle {cycle}' 194 logging.warning(s) 195 if not allow_cycles: 196 raise ValueError(s) 197 acc.append(entry.header) 198 for n in entry.nexts: 199 n = all_cols.get(n) 200 if n is not None: 201 n.indegree -= 1 202 else: 203 if not allow_cycles: 204 raise ValueError(f'unexpected error for: {n}') 205 return acc 206 207 208def get_build_info_and_perf(d: Path) -> dict[str, any]: 209 perf = read_pbs(d) 210 build_info_json = d.joinpath(util.BUILD_INFO_JSON) 211 if not build_info_json.exists(): 212 return perf 213 with open(build_info_json, 'r') as f: 214 build_info = json.load(f) 215 return build_info | perf 216 217 218def tabulate_metrics_csv(log_dir: Path): 219 rows: list[dict[str, any]] = [] 220 dirs = glob.glob(f'{util.RUN_DIR_PREFIX}*', root_dir=log_dir) 221 dirs.sort(key=lambda x: int(x[1 + len(util.RUN_DIR_PREFIX):])) 222 for d in dirs: 223 d = log_dir.joinpath(d) 224 row = get_build_info_and_perf(d) 225 rows.append(row) 226 227 headers: list[str] = _get_column_headers(rows, allow_cycles=True) 228 229 def row2line(r): 230 return ','.join([str(r.get(col) or '') for col in headers]) 231 232 lines = [','.join(headers)] 233 lines.extend(row2line(r) for r in rows) 234 235 with open(log_dir.joinpath(util.METRICS_TABLE), mode='wt') as f: 236 f.writelines(f'{line}\n' for line in lines) 237 238 239def display_tabulated_metrics(log_dir: Path): 240 cmd_str = util.get_cmd_to_display_tabulated_metrics(log_dir) 241 output = subprocess.check_output(cmd_str, shell=True, text=True) 242 logging.info(textwrap.dedent(f''' 243 %s 244 TIPS: 245 1 To view key metrics in metrics.csv: 246 %s 247 2 To view column headers: 248 %s 249 '''), output, cmd_str, util.get_csv_columns_cmd(log_dir)) 250