1 // Copyright 2022 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::collections::HashMap;
6 use std::collections::HashSet;
7 use std::fs;
8 use std::hash::Hash;
9 use std::hash::Hasher;
10
11 use argh::FromArgs;
12 use serde::Deserialize;
13 use serde::Serialize;
14 use serde_json::json;
15 use serde_json::to_string_pretty;
16 use serde_json::Number;
17 use serde_json::Value;
18 use uuid::Uuid;
19
20 /// This tool takes results from Fuchsia performance tests (in Fuchsia's JSON perf test results
21 /// format) and converts them to the Catapult Dashboard's JSON HistogramSet format.
22 ///
23 /// See <https://cs.opensource.google/fuchsia/fuchsia/+/main:src/testing/catapult_converter/README.md>
24 /// for details on arguments that are copied into output
25 #[derive(FromArgs)]
26 struct ConverterArgs {
27 /// input file: perf test results JSON file
28 #[argh(option, arg_name = "FILENAME")]
29 input: String,
30
31 /// output file: Catapult HistogramSet JSON file (default is stdout)
32 #[argh(option, arg_name = "FILENAME")]
33 output: Option<String>,
34
35 /// release version in the format 0.yyyymmdd.a.b if applicable. e.g. 0.20200101.1.2
36 #[argh(option, arg_name = "STRING")]
37 product_versions: Option<String>,
38
39 /// copied into output file as pointId, used to order results from different builds in a graph
40 #[argh(option, arg_name = "NUMBER")]
41 execution_timestamp_ms: i64,
42
43 /// copied into output file
44 #[argh(option, arg_name = "STRING")]
45 masters: String,
46
47 /// copied into output file
48 #[argh(option, arg_name = "STRING")]
49 bots: String,
50
51 /// copied into output file
52 #[argh(option, arg_name = "URL")]
53 log_url: String,
54 }
55
56 #[derive(Deserialize, Debug)]
57 enum FuchsiaPerfUnit {
58 #[serde(alias = "nanoseconds", alias = "ns")]
59 NanoSeconds,
60 #[serde(alias = "milliseconds", alias = "ms")]
61 Milliseconds,
62 #[serde(alias = "bytes/second")]
63 BytesPerSecond,
64 #[serde(alias = "bits/second")]
65 BitsPerSecond,
66 #[serde(alias = "bytes")]
67 Bytes,
68 #[serde(alias = "frames/second")]
69 FramesPerSecond,
70 #[serde(alias = "percent")]
71 Percent,
72 #[serde(alias = "count")]
73 Count,
74 Watts,
75 }
76
77 #[derive(Serialize, Debug)]
78 enum HistogramUnit {
79 #[serde(rename = "ms_smallerIsBetter")]
80 Milliseconds,
81 #[serde(rename = "unitless_biggerIsBetter")]
82 UnitlessBiggerIsBetter,
83 #[serde(rename = "sizeInBytes_smallerIsBetter")]
84 Bytes,
85 #[serde(rename = "Hz_biggerIsBetter")]
86 FramesPerSecond,
87 #[serde(rename = "n%_smallerIsBetter")]
88 Percent,
89 #[serde(rename = "count")]
90 Count,
91 #[serde(rename = "W_smallerIsBetter")]
92 Watts,
93 }
94
95 #[derive(Deserialize, Debug)]
96 struct FuchsiaPerf {
97 #[serde(alias = "label")]
98 test_name: String,
99 metric: Option<String>,
100 test_suite: String,
101 unit: FuchsiaPerfUnit,
102 values: Vec<f64>,
103 }
104
convert_unit(input_unit: FuchsiaPerfUnit, values: &mut [f64]) -> HistogramUnit105 fn convert_unit(input_unit: FuchsiaPerfUnit, values: &mut [f64]) -> HistogramUnit {
106 match input_unit {
107 FuchsiaPerfUnit::NanoSeconds => {
108 for value in values.iter_mut() {
109 *value /= 1e6;
110 }
111 HistogramUnit::Milliseconds
112 }
113 FuchsiaPerfUnit::Milliseconds => HistogramUnit::Milliseconds,
114 // The Catapult dashboard does not yet support a "bytes per unit time"
115 // unit (of any multiple), and it rejects unknown units, so we report
116 // this as "unitless" here for now.
117 FuchsiaPerfUnit::BytesPerSecond => HistogramUnit::UnitlessBiggerIsBetter,
118 FuchsiaPerfUnit::BitsPerSecond => {
119 // convert to bytes/s to be consistent with bytes/second
120 for value in values.iter_mut() {
121 *value /= 8.0;
122 }
123 HistogramUnit::UnitlessBiggerIsBetter
124 }
125 FuchsiaPerfUnit::Bytes => HistogramUnit::Bytes,
126 FuchsiaPerfUnit::FramesPerSecond => HistogramUnit::FramesPerSecond,
127 FuchsiaPerfUnit::Percent => HistogramUnit::Percent,
128 FuchsiaPerfUnit::Count => HistogramUnit::Count,
129 FuchsiaPerfUnit::Watts => HistogramUnit::Watts,
130 }
131 }
132
133 #[derive(Serialize, Clone, Debug, Eq)]
134 struct Diagnostic {
135 guid: String,
136 #[serde(rename = "type", default = "GenericSet")]
137 diag_type: String,
138 values: Vec<Value>,
139 }
140
141 impl Hash for Diagnostic {
hash<H: Hasher>(&self, state: &mut H)142 fn hash<H: Hasher>(&self, state: &mut H) {
143 self.guid.hash(state);
144 }
145 }
146
147 impl PartialEq<Self> for Diagnostic {
eq(&self, other: &Self) -> bool148 fn eq(&self, other: &Self) -> bool {
149 self.guid == other.guid
150 }
151 }
152
153 impl Default for Diagnostic {
default() -> Self154 fn default() -> Self {
155 Diagnostic {
156 guid: Uuid::new_v4().to_string(),
157 diag_type: "GenericSet".to_string(),
158 values: Vec::default(),
159 }
160 }
161 }
162
163 #[derive(Serialize, Debug)]
164 struct Histogram {
165 name: String,
166 unit: HistogramUnit,
167 #[serde(default)]
168 description: String,
169 diagnostics: HashMap<&'static str, String>,
170 // serde_json converts NaN / infinite to null by default
171 running: Vec<Option<Number>>,
172 guid: String,
173 #[serde(rename = "maxNumSampleValues")]
174 max_num_sample_values: u64,
175 #[serde(rename = "numNans", default)]
176 num_nans: u64,
177 }
178
179 impl Histogram {
new( test_name: &str, unit: FuchsiaPerfUnit, diagnostics: HashMap<&'static str, String>, original_values: Vec<f64>, ) -> Self180 fn new(
181 test_name: &str,
182 unit: FuchsiaPerfUnit,
183 diagnostics: HashMap<&'static str, String>,
184 original_values: Vec<f64>,
185 ) -> Self {
186 let mut values = original_values;
187 let output_unit = convert_unit(unit, &mut values);
188
189 let mut stats: Vec<Option<Number>> = Vec::new();
190 let mean: f64 = values.iter().sum::<f64>() / values.len() as f64;
191
192 // count
193 stats.push(Some(values.len().into()));
194
195 // max
196 stats.push(Number::from_f64(
197 values.iter().cloned().max_by(f64::total_cmp).unwrap(),
198 ));
199
200 // meanlogs
201 stats.push(Number::from_f64(
202 values.iter().map(|x| f64::ln(*x)).sum::<f64>() / values.len() as f64,
203 ));
204
205 // mean
206 stats.push(Number::from_f64(mean));
207
208 // min
209 stats.push(Number::from_f64(
210 values.iter().cloned().min_by(f64::total_cmp).unwrap(),
211 ));
212
213 // sum
214 stats.push(Number::from_f64(values.iter().sum()));
215
216 // variance
217 // Bessel's correction applied. Bessel's correction gives us a better estimation of
218 // the population's variance given a sample of the population.
219 stats.push(Number::from_f64(if values.len() <= 1 {
220 0.0
221 } else {
222 values
223 .iter()
224 .map(|x| (*x - mean) * (*x - mean))
225 .sum::<f64>()
226 / (values.len() - 1) as f64
227 }));
228
229 Histogram {
230 name: test_name.to_string(),
231 unit: output_unit,
232 description: "".to_string(),
233 diagnostics,
234 running: stats,
235 guid: Uuid::new_v4().to_string(),
236 max_num_sample_values: values.len() as u64,
237 // Assume for now that we didn't get any NaN values.
238 num_nans: 0,
239 }
240 }
241 }
242
build_shared_diagnostic_map( args: &ConverterArgs, ) -> (HashMap<&'static str, String>, HashSet<Diagnostic>)243 fn build_shared_diagnostic_map(
244 args: &ConverterArgs,
245 ) -> (HashMap<&'static str, String>, HashSet<Diagnostic>) {
246 let mut diag_map = HashMap::new();
247 let mut diag_set = HashSet::new();
248
249 let diag = Diagnostic {
250 values: vec![json!(args.execution_timestamp_ms)],
251 ..Default::default()
252 };
253 diag_set.insert(diag.clone());
254 diag_map.insert("pointId", diag.guid);
255
256 let diag = Diagnostic {
257 values: vec![json!(args.bots)],
258 ..Default::default()
259 };
260 diag_set.insert(diag.clone());
261 diag_map.insert("bots", diag.guid);
262
263 let diag = Diagnostic {
264 values: vec![json!(args.masters)],
265 ..Default::default()
266 };
267 diag_set.insert(diag.clone());
268 diag_map.insert("masters", diag.guid);
269
270 if let Some(version) = &args.product_versions {
271 let diag = Diagnostic {
272 values: vec![json!(version)],
273 ..Default::default()
274 };
275 diag_set.insert(diag.clone());
276 diag_map.insert("a_productVersions", diag.guid);
277 }
278 let diag = Diagnostic {
279 values: vec![json!(vec!("Build Log".to_string(), args.log_url.clone()))],
280 ..Default::default()
281 };
282 diag_set.insert(diag.clone());
283 diag_map.insert("logUrls", diag.guid);
284 (diag_map, diag_set)
285 }
286
287 #[derive(Serialize, Debug)]
288 #[serde(untagged)]
289 enum HistogramSetElement {
290 Diagnostic(Diagnostic),
291 Histogram(Histogram),
292 }
293
main()294 fn main() {
295 let args: ConverterArgs = argh::from_env();
296 let content = fs::read_to_string(&args.input)
297 .expect("Failed to read the file, have you specified the correct path?");
298
299 let perf_data: Vec<FuchsiaPerf> =
300 serde_json::from_str(&content).expect("Failed to parse input data file");
301
302 let (shared_diag_map, mut diag_set) = build_shared_diagnostic_map(&args);
303
304 let mut test_suite_guid_map = HashMap::new();
305
306 for test_result in &perf_data {
307 if !test_suite_guid_map.contains_key(&test_result.test_suite) {
308 let new_uuid = Uuid::new_v4().to_string();
309 test_suite_guid_map.insert(test_result.test_suite.clone(), new_uuid.to_owned());
310 diag_set.insert(Diagnostic {
311 values: vec![json!(test_result.test_suite)],
312 guid: new_uuid,
313 ..Default::default()
314 });
315 }
316 }
317
318 let mut output = Vec::<HistogramSetElement>::new();
319 output.extend(
320 diag_set
321 .iter()
322 .cloned()
323 .map(HistogramSetElement::Diagnostic),
324 );
325
326 for test_result in perf_data {
327 let mut diag_map = shared_diag_map.clone();
328 diag_map.insert(
329 "benchmarks",
330 test_suite_guid_map[&test_result.test_suite].clone(),
331 );
332
333 let mut name = test_result.test_name.clone();
334 if let Some(metric) = &test_result.metric {
335 if metric != "real_time" {
336 name += "/";
337 name += metric.as_str();
338 }
339 }
340
341 output.push(HistogramSetElement::Histogram(Histogram::new(
342 name.replace(" ", "_").as_str(),
343 test_result.unit,
344 diag_map,
345 test_result.values,
346 )));
347 }
348
349 let serialized_output = to_string_pretty(&output).expect("Unable to serialize result");
350
351 match &args.output {
352 Some(file_name) => fs::write(file_name, serialized_output).unwrap(),
353 None => println!("{}", serialized_output),
354 }
355 }
356