1 //! This module is responsible for collecting metrics profiling information for the current build 2 //! and dumping it to disk as JSON, to aid investigations on build and CI performance. 3 //! 4 //! As this module requires additional dependencies not present during local builds, it's cfg'd 5 //! away whenever the `build.metrics` config option is not set to `true`. 6 7 use crate::builder::{Builder, Step}; 8 use crate::util::t; 9 use crate::Build; 10 use serde_derive::{Deserialize, Serialize}; 11 use std::cell::RefCell; 12 use std::fs::File; 13 use std::io::BufWriter; 14 use std::time::{Duration, Instant, SystemTime}; 15 use sysinfo::{CpuExt, System, SystemExt}; 16 17 // Update this number whenever a breaking change is made to the build metrics. 18 // 19 // The output format is versioned for two reasons: 20 // 21 // - The metadata is intended to be consumed by external tooling, and exposing a format version 22 // helps the tools determine whether they're compatible with a metrics file. 23 // 24 // - If a developer enables build metrics in their local checkout, making a breaking change to the 25 // metrics format would result in a hard-to-diagnose error message when an existing metrics file 26 // is not compatible with the new changes. With a format version number, bootstrap can discard 27 // incompatible metrics files instead of appending metrics to them. 28 // 29 // Version changelog: 30 // 31 // - v0: initial version 32 // - v1: replaced JsonNode::Test with JsonNode::TestSuite 33 // 34 const CURRENT_FORMAT_VERSION: usize = 1; 35 36 pub(crate) struct BuildMetrics { 37 state: RefCell<MetricsState>, 38 } 39 40 impl BuildMetrics { init() -> Self41 pub(crate) fn init() -> Self { 42 let state = RefCell::new(MetricsState { 43 finished_steps: Vec::new(), 44 running_steps: Vec::new(), 45 46 system_info: System::new(), 47 timer_start: None, 48 invocation_timer_start: Instant::now(), 49 invocation_start: SystemTime::now(), 50 }); 51 52 BuildMetrics { state } 53 } 54 enter_step<S: Step>(&self, step: &S, builder: &Builder<'_>)55 pub(crate) fn enter_step<S: Step>(&self, step: &S, builder: &Builder<'_>) { 56 // Do not record dry runs, as they'd be duplicates of the actual steps. 57 if builder.config.dry_run() { 58 return; 59 } 60 61 let mut state = self.state.borrow_mut(); 62 63 // Consider all the stats gathered so far as the parent's. 64 if !state.running_steps.is_empty() { 65 self.collect_stats(&mut *state); 66 } 67 68 state.system_info.refresh_cpu(); 69 state.timer_start = Some(Instant::now()); 70 71 state.running_steps.push(StepMetrics { 72 type_: std::any::type_name::<S>().into(), 73 debug_repr: format!("{step:?}"), 74 75 cpu_usage_time_sec: 0.0, 76 duration_excluding_children_sec: Duration::ZERO, 77 78 children: Vec::new(), 79 test_suites: Vec::new(), 80 }); 81 } 82 exit_step(&self, builder: &Builder<'_>)83 pub(crate) fn exit_step(&self, builder: &Builder<'_>) { 84 // Do not record dry runs, as they'd be duplicates of the actual steps. 85 if builder.config.dry_run() { 86 return; 87 } 88 89 let mut state = self.state.borrow_mut(); 90 91 self.collect_stats(&mut *state); 92 93 let step = state.running_steps.pop().unwrap(); 94 if state.running_steps.is_empty() { 95 state.finished_steps.push(step); 96 state.timer_start = None; 97 } else { 98 state.running_steps.last_mut().unwrap().children.push(step); 99 100 // Start collecting again for the parent step. 101 state.system_info.refresh_cpu(); 102 state.timer_start = Some(Instant::now()); 103 } 104 } 105 begin_test_suite(&self, metadata: TestSuiteMetadata, builder: &Builder<'_>)106 pub(crate) fn begin_test_suite(&self, metadata: TestSuiteMetadata, builder: &Builder<'_>) { 107 // Do not record dry runs, as they'd be duplicates of the actual steps. 108 if builder.config.dry_run() { 109 return; 110 } 111 112 let mut state = self.state.borrow_mut(); 113 let step = state.running_steps.last_mut().unwrap(); 114 step.test_suites.push(TestSuite { metadata, tests: Vec::new() }); 115 } 116 record_test(&self, name: &str, outcome: TestOutcome, builder: &Builder<'_>)117 pub(crate) fn record_test(&self, name: &str, outcome: TestOutcome, builder: &Builder<'_>) { 118 // Do not record dry runs, as they'd be duplicates of the actual steps. 119 if builder.config.dry_run() { 120 return; 121 } 122 123 let mut state = self.state.borrow_mut(); 124 let step = state.running_steps.last_mut().unwrap(); 125 126 if let Some(test_suite) = step.test_suites.last_mut() { 127 test_suite.tests.push(Test { name: name.to_string(), outcome }); 128 } else { 129 panic!("metrics.record_test() called without calling metrics.begin_test_suite() first"); 130 } 131 } 132 collect_stats(&self, state: &mut MetricsState)133 fn collect_stats(&self, state: &mut MetricsState) { 134 let step = state.running_steps.last_mut().unwrap(); 135 136 let elapsed = state.timer_start.unwrap().elapsed(); 137 step.duration_excluding_children_sec += elapsed; 138 139 state.system_info.refresh_cpu(); 140 let cpu = state.system_info.cpus().iter().map(|p| p.cpu_usage()).sum::<f32>(); 141 step.cpu_usage_time_sec += cpu as f64 / 100.0 * elapsed.as_secs_f64(); 142 } 143 persist(&self, build: &Build)144 pub(crate) fn persist(&self, build: &Build) { 145 let mut state = self.state.borrow_mut(); 146 assert!(state.running_steps.is_empty(), "steps are still executing"); 147 148 let dest = build.out.join("metrics.json"); 149 150 let mut system = System::new(); 151 system.refresh_cpu(); 152 system.refresh_memory(); 153 154 let system_stats = JsonInvocationSystemStats { 155 cpu_threads_count: system.cpus().len(), 156 cpu_model: system.cpus()[0].brand().into(), 157 158 memory_total_bytes: system.total_memory(), 159 }; 160 let steps = std::mem::take(&mut state.finished_steps); 161 162 // Some of our CI builds consist of multiple independent CI invocations. Ensure all the 163 // previous invocations are still present in the resulting file. 164 let mut invocations = match std::fs::read(&dest) { 165 Ok(contents) => { 166 // We first parse just the format_version field to have the check succeed even if 167 // the rest of the contents are not valid anymore. 168 let version: OnlyFormatVersion = t!(serde_json::from_slice(&contents)); 169 if version.format_version == CURRENT_FORMAT_VERSION { 170 t!(serde_json::from_slice::<JsonRoot>(&contents)).invocations 171 } else { 172 println!( 173 "warning: overriding existing build/metrics.json, as it's not \ 174 compatible with build metrics format version {CURRENT_FORMAT_VERSION}." 175 ); 176 Vec::new() 177 } 178 } 179 Err(err) => { 180 if err.kind() != std::io::ErrorKind::NotFound { 181 panic!("failed to open existing metrics file at {}: {err}", dest.display()); 182 } 183 Vec::new() 184 } 185 }; 186 invocations.push(JsonInvocation { 187 start_time: state 188 .invocation_start 189 .duration_since(SystemTime::UNIX_EPOCH) 190 .unwrap() 191 .as_secs(), 192 duration_including_children_sec: state.invocation_timer_start.elapsed().as_secs_f64(), 193 children: steps.into_iter().map(|step| self.prepare_json_step(step)).collect(), 194 }); 195 196 let json = JsonRoot { format_version: CURRENT_FORMAT_VERSION, system_stats, invocations }; 197 198 t!(std::fs::create_dir_all(dest.parent().unwrap())); 199 let mut file = BufWriter::new(t!(File::create(&dest))); 200 t!(serde_json::to_writer(&mut file, &json)); 201 } 202 prepare_json_step(&self, step: StepMetrics) -> JsonNode203 fn prepare_json_step(&self, step: StepMetrics) -> JsonNode { 204 let mut children = Vec::new(); 205 children.extend(step.children.into_iter().map(|child| self.prepare_json_step(child))); 206 children.extend(step.test_suites.into_iter().map(JsonNode::TestSuite)); 207 208 JsonNode::RustbuildStep { 209 type_: step.type_, 210 debug_repr: step.debug_repr, 211 212 duration_excluding_children_sec: step.duration_excluding_children_sec.as_secs_f64(), 213 system_stats: JsonStepSystemStats { 214 cpu_utilization_percent: step.cpu_usage_time_sec * 100.0 215 / step.duration_excluding_children_sec.as_secs_f64(), 216 }, 217 218 children, 219 } 220 } 221 } 222 223 struct MetricsState { 224 finished_steps: Vec<StepMetrics>, 225 running_steps: Vec<StepMetrics>, 226 227 system_info: System, 228 timer_start: Option<Instant>, 229 invocation_timer_start: Instant, 230 invocation_start: SystemTime, 231 } 232 233 struct StepMetrics { 234 type_: String, 235 debug_repr: String, 236 237 cpu_usage_time_sec: f64, 238 duration_excluding_children_sec: Duration, 239 240 children: Vec<StepMetrics>, 241 test_suites: Vec<TestSuite>, 242 } 243 244 #[derive(Serialize, Deserialize)] 245 #[serde(rename_all = "snake_case")] 246 struct JsonRoot { 247 #[serde(default)] // For version 0 the field was not present. 248 format_version: usize, 249 system_stats: JsonInvocationSystemStats, 250 invocations: Vec<JsonInvocation>, 251 } 252 253 #[derive(Serialize, Deserialize)] 254 #[serde(rename_all = "snake_case")] 255 struct JsonInvocation { 256 // Unix timestamp in seconds 257 // 258 // This is necessary to easily correlate this invocation with logs or other data. 259 start_time: u64, 260 duration_including_children_sec: f64, 261 children: Vec<JsonNode>, 262 } 263 264 #[derive(Serialize, Deserialize)] 265 #[serde(tag = "kind", rename_all = "snake_case")] 266 enum JsonNode { 267 RustbuildStep { 268 #[serde(rename = "type")] 269 type_: String, 270 debug_repr: String, 271 272 duration_excluding_children_sec: f64, 273 system_stats: JsonStepSystemStats, 274 275 children: Vec<JsonNode>, 276 }, 277 TestSuite(TestSuite), 278 } 279 280 #[derive(Serialize, Deserialize)] 281 struct TestSuite { 282 metadata: TestSuiteMetadata, 283 tests: Vec<Test>, 284 } 285 286 #[derive(Serialize, Deserialize)] 287 #[serde(tag = "kind", rename_all = "snake_case")] 288 pub(crate) enum TestSuiteMetadata { 289 CargoPackage { 290 crates: Vec<String>, 291 target: String, 292 host: String, 293 stage: u32, 294 }, 295 Compiletest { 296 suite: String, 297 mode: String, 298 compare_mode: Option<String>, 299 target: String, 300 host: String, 301 stage: u32, 302 }, 303 } 304 305 #[derive(Serialize, Deserialize)] 306 pub(crate) struct Test { 307 name: String, 308 #[serde(flatten)] 309 outcome: TestOutcome, 310 } 311 312 #[derive(Serialize, Deserialize)] 313 #[serde(tag = "outcome", rename_all = "snake_case")] 314 pub(crate) enum TestOutcome { 315 Passed, 316 Failed, 317 Ignored { ignore_reason: Option<String> }, 318 } 319 320 #[derive(Serialize, Deserialize)] 321 #[serde(rename_all = "snake_case")] 322 struct JsonInvocationSystemStats { 323 cpu_threads_count: usize, 324 cpu_model: String, 325 326 memory_total_bytes: u64, 327 } 328 329 #[derive(Serialize, Deserialize)] 330 #[serde(rename_all = "snake_case")] 331 struct JsonStepSystemStats { 332 cpu_utilization_percent: f64, 333 } 334 335 #[derive(Deserialize)] 336 struct OnlyFormatVersion { 337 #[serde(default)] // For version 0 the field was not present. 338 format_version: usize, 339 } 340