1 // Copyright 2024 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // A utility to compare two runs of a style perftest, in the perftest
6 // output format, and compute confidence intervals. The simplest way
7 // to get the right information is to build a binary before and after
8 // changes and then run it continuously every-other for a while until
9 // you feel you have enough data:
10 //
11 // rm -f old.txt new.txt; \
12 // while :; do
13 // taskset -c 2,4,6,8 ./out/Release/blink_perf___old \
14 // --gtest_filter=StyleCalcPerfTest.\* 2>&1 | tee -a old.txt;
15 // taskset -c 2,4,6,8 ./out/Release/blink_perf_tests \
16 // --gtest_filter=StyleCalcPerfTest.\* 2>&1 | tee -a new.txt;
17 // done
18 //
19 // and then run ./out/Release/compare_blink_perf old.txt new.txt.
20 // (Possibly under watch -n 1 if you want to look at data as it
21 // comes in, though beware of p-hacking.)
22 //
23 // TODO(sesse): Consider whether we should remove the first few
24 // runs, as they are frequently outliers.
25
26 #include <stdio.h>
27 #include <stdlib.h>
28
29 #include <algorithm>
30 #include <string>
31 #include <unordered_map>
32 #include <utility>
33 #include <vector>
34
35 #include "base/rand_util.h"
36 #include "base/strings/string_number_conversions.h"
37 #include "testing/perf/confidence/ratio_bootstrap_estimator.h"
38
39 #ifdef UNSAFE_BUFFERS_BUILD
40 // Not used with untrusted inputs.
41 #pragma allow_unsafe_buffers
42 #endif
43
44 using std::max;
45 using std::min;
46 using std::numeric_limits;
47 using std::pair;
48 using std::sort;
49 using std::string;
50 using std::unordered_map;
51 using std::vector;
52
53 namespace {
54
BeautifyCategory(const string & category)55 string BeautifyCategory(const string& category) {
56 if (category == "BlinkStyleInitialCalcTime") {
57 return "Initial style (µs)";
58 } else if (category == "BlinkStyleRecalcTime") {
59 return "Recalc style (µs)";
60 } else if (category == "BlinkStyleParseTime") {
61 return "Parse (µs)";
62 } else {
63 return category;
64 }
65 }
66
CodeUnitCompareIgnoringASCIICaseLessThan(const string & a,const string & b)67 bool CodeUnitCompareIgnoringASCIICaseLessThan(const string& a,
68 const string& b) {
69 return lexicographical_compare(
70 a.begin(), a.end(), b.begin(), b.end(),
71 [](char c1, char c2) { return std::tolower(c1) < std::tolower(c2); });
72 }
73
74 // The structure is e.g. BlinkStyleParseTime -> Video -> [100 us, 90 us, ...]
ReadFile(const char * filename)75 unordered_map<string, unordered_map<string, vector<double>>> ReadFile(
76 const char* filename) {
77 unordered_map<string, unordered_map<string, vector<double>>> measurements;
78
79 FILE* fp = fopen(filename, "r");
80 if (fp == nullptr) {
81 perror(filename);
82 exit(1);
83 }
84 while (!feof(fp)) {
85 char buf[4096];
86 if (fgets(buf, sizeof(buf), fp) == nullptr) {
87 break;
88 }
89 string str(buf);
90 if (str.length() > 1 && str[str.length() - 1] == '\n') {
91 str.resize(str.length() - 1);
92 }
93 if (str.length() > 1 && str[str.length() - 1] == '\r') {
94 str.resize(str.length() - 1);
95 }
96
97 // A result line looks like: *RESULT BlinkStyleParseTime: Video= 11061 us
98 vector<string> cols{""};
99 for (char ch : str) {
100 if (ch == ' ') {
101 cols.push_back("");
102 } else {
103 cols.back().push_back(ch);
104 }
105 }
106 if (cols.size() != 5 || cols[0] != "*RESULT" || !cols[1].ends_with(":") ||
107 !cols[2].ends_with("=") || cols[4] != "us") {
108 continue;
109 }
110
111 string category = cols[1];
112 category.resize(category.length() - 1);
113 category = BeautifyCategory(category);
114
115 string benchmark = cols[2];
116 benchmark.resize(benchmark.length() - 1);
117
118 double val;
119 if (!base::StringToDouble(cols[3], &val)) {
120 continue;
121 }
122 measurements[category][benchmark].push_back(val);
123 }
124
125 fclose(fp);
126 return measurements;
127 }
128
129 // Find the number of trials, for display.
FindNumberOfTrials(const unordered_map<string,unordered_map<string,vector<double>>> & measurements,unsigned & min_num_trials,unsigned & max_num_trials)130 void FindNumberOfTrials(
131 const unordered_map<string, unordered_map<string, vector<double>>>&
132 measurements,
133 unsigned& min_num_trials,
134 unsigned& max_num_trials) {
135 for (const auto& [category, entry] : measurements) {
136 for (const auto& [benchmark, samples] : entry) {
137 min_num_trials = min<unsigned>(min_num_trials, samples.size());
138 max_num_trials = max<unsigned>(max_num_trials, samples.size());
139 }
140 }
141 }
142
143 struct Label {
144 string benchmark;
145 size_t data_index;
146 };
147
148 } // namespace
149
main(int argc,char ** argv)150 int main(int argc, char** argv) {
151 if (argc != 3) {
152 fprintf(stderr, "USAGE: compare_blink_perf OLD_LOG NEW_LOG\n");
153 exit(1);
154 }
155
156 unordered_map<string, unordered_map<string, vector<double>>> before =
157 ReadFile(argv[1]);
158 unordered_map<string, unordered_map<string, vector<double>>> after =
159 ReadFile(argv[2]);
160
161 unsigned min_num_trials = numeric_limits<unsigned>::max();
162 unsigned max_num_trials = numeric_limits<unsigned>::min();
163 FindNumberOfTrials(before, min_num_trials, max_num_trials);
164 FindNumberOfTrials(after, min_num_trials, max_num_trials);
165 if (min_num_trials == max_num_trials) {
166 printf("%u trial(s) on each side.\n", min_num_trials);
167 } else {
168 printf("%u–%u trial(s) on each side.\n", min_num_trials, max_num_trials);
169 }
170
171 // Now pair up the data. (The estimator treats them as unpaired,
172 // but currently needs them to be of the same length within a single
173 // benchmark.) We do one run per category, so that we can get
174 // geometric means over them (RatioBootstrapEstimator doesn't support
175 // arbitrary grouping).
176 vector<string> sorted_categories;
177 for (const auto& [category, entry] : before) {
178 sorted_categories.push_back(category);
179 }
180 sort(sorted_categories.begin(), sorted_categories.end(),
181 [](const string& a, const string& b) {
182 return CodeUnitCompareIgnoringASCIICaseLessThan(a, b);
183 });
184 for (const string& category : sorted_categories) {
185 vector<Label> labels;
186 vector<vector<RatioBootstrapEstimator::Sample>> data;
187 for (const auto& [benchmark, before_samples] :
188 before.find(category)->second) {
189 const auto after_entry = after.find(category);
190 if (after_entry == after.end()) {
191 continue;
192 }
193 const auto after_samples = after_entry->second.find(benchmark);
194 if (after_samples == after_entry->second.end()) {
195 continue;
196 }
197
198 vector<RatioBootstrapEstimator::Sample> samples;
199 for (unsigned i = 0;
200 i < std::min(before_samples.size(), after_samples->second.size());
201 ++i) {
202 samples.push_back({before_samples[i], after_samples->second[i]});
203 }
204 labels.emplace_back(Label{benchmark, data.size()});
205 data.push_back(std::move(samples));
206 }
207
208 RatioBootstrapEstimator estimator(base::RandUint64());
209 const unsigned kNumResamples = 2000;
210 vector<RatioBootstrapEstimator::Estimate> estimates =
211 estimator.ComputeRatioEstimates(data, kNumResamples,
212 /*confidence_level=*/0.95,
213 /*compute_geometric_mean=*/true);
214
215 // Sort the labels for display.
216 sort(labels.begin(), labels.end(), [](const Label& a, const Label& b) {
217 return CodeUnitCompareIgnoringASCIICaseLessThan(a.benchmark, b.benchmark);
218 });
219
220 printf("\n");
221 printf("%-20s %9s %9s %7s %17s\n", category.c_str(), "Before", "After",
222 "Perf", "95% CI (BCa)");
223 printf(
224 "=================== ========= ========= ======= "
225 "=================\n");
226 for (const Label& label : labels) {
227 // RatioBootstrapEstimator doesn't give us the plain means, so compute
228 // that by hand.
229 double sum_before = 0.0, sum_after = 0.0;
230 for (const RatioBootstrapEstimator::Sample& sample :
231 data[label.data_index]) {
232 sum_before += sample.before;
233 sum_after += sample.after;
234 }
235 double mean_before = sum_before / data[label.data_index].size();
236 double mean_after = sum_after / data[label.data_index].size();
237
238 const RatioBootstrapEstimator::Estimate& estimate =
239 estimates[label.data_index];
240 printf("%-19s %9.0f %9.0f %+6.1f%% [%+5.1f%%, %+5.1f%%]\n",
241 label.benchmark.c_str(), mean_before, mean_after,
242 100.0 * (estimate.point_estimate - 1.0),
243 100.0 * (estimate.lower - 1.0), 100.0 * (estimate.upper - 1.0));
244 }
245
246 const RatioBootstrapEstimator::Estimate& estimate = estimates[data.size()];
247 printf("%-19s %9s %9s %+6.1f%% [%+5.1f%%, %+5.1f%%]\n", "Geometric mean",
248 "", "", 100.0 * (estimate.point_estimate - 1.0),
249 100.0 * (estimate.lower - 1.0), 100.0 * (estimate.upper - 1.0));
250 }
251 }
252