• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2024 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // A utility to compare two runs of a style perftest, in the perftest
6 // output format, and compute confidence intervals. The simplest way
7 // to get the right information is to build a binary before and after
8 // changes and then run it continuously every-other for a while until
9 // you feel you have enough data:
10 //
11 //   rm -f old.txt new.txt; \
12 //   while :; do
13 //     taskset -c 2,4,6,8 ./out/Release/blink_perf___old \
14 //       --gtest_filter=StyleCalcPerfTest.\* 2>&1 | tee -a old.txt;
15 //     taskset -c 2,4,6,8 ./out/Release/blink_perf_tests \
16 //       --gtest_filter=StyleCalcPerfTest.\* 2>&1 | tee -a new.txt;
17 //   done
18 //
19 // and then run ./out/Release/compare_blink_perf old.txt new.txt.
20 // (Possibly under watch -n 1 if you want to look at data as it
21 // comes in, though beware of p-hacking.)
22 //
23 // TODO(sesse): Consider whether we should remove the first few
24 // runs, as they are frequently outliers.
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 
29 #include <algorithm>
30 #include <string>
31 #include <unordered_map>
32 #include <utility>
33 #include <vector>
34 
35 #include "base/rand_util.h"
36 #include "base/strings/string_number_conversions.h"
37 #include "testing/perf/confidence/ratio_bootstrap_estimator.h"
38 
39 #ifdef UNSAFE_BUFFERS_BUILD
40 // Not used with untrusted inputs.
41 #pragma allow_unsafe_buffers
42 #endif
43 
44 using std::max;
45 using std::min;
46 using std::numeric_limits;
47 using std::pair;
48 using std::sort;
49 using std::string;
50 using std::unordered_map;
51 using std::vector;
52 
53 namespace {
54 
BeautifyCategory(const string & category)55 string BeautifyCategory(const string& category) {
56   if (category == "BlinkStyleInitialCalcTime") {
57     return "Initial style (µs)";
58   } else if (category == "BlinkStyleRecalcTime") {
59     return "Recalc style (µs)";
60   } else if (category == "BlinkStyleParseTime") {
61     return "Parse (µs)";
62   } else {
63     return category;
64   }
65 }
66 
CodeUnitCompareIgnoringASCIICaseLessThan(const string & a,const string & b)67 bool CodeUnitCompareIgnoringASCIICaseLessThan(const string& a,
68                                               const string& b) {
69   return lexicographical_compare(
70       a.begin(), a.end(), b.begin(), b.end(),
71       [](char c1, char c2) { return std::tolower(c1) < std::tolower(c2); });
72 }
73 
74 // The structure is e.g. BlinkStyleParseTime -> Video -> [100 us, 90 us, ...]
ReadFile(const char * filename)75 unordered_map<string, unordered_map<string, vector<double>>> ReadFile(
76     const char* filename) {
77   unordered_map<string, unordered_map<string, vector<double>>> measurements;
78 
79   FILE* fp = fopen(filename, "r");
80   if (fp == nullptr) {
81     perror(filename);
82     exit(1);
83   }
84   while (!feof(fp)) {
85     char buf[4096];
86     if (fgets(buf, sizeof(buf), fp) == nullptr) {
87       break;
88     }
89     string str(buf);
90     if (str.length() > 1 && str[str.length() - 1] == '\n') {
91       str.resize(str.length() - 1);
92     }
93     if (str.length() > 1 && str[str.length() - 1] == '\r') {
94       str.resize(str.length() - 1);
95     }
96 
97     // A result line looks like: *RESULT BlinkStyleParseTime: Video= 11061 us
98     vector<string> cols{""};
99     for (char ch : str) {
100       if (ch == ' ') {
101         cols.push_back("");
102       } else {
103         cols.back().push_back(ch);
104       }
105     }
106     if (cols.size() != 5 || cols[0] != "*RESULT" || !cols[1].ends_with(":") ||
107         !cols[2].ends_with("=") || cols[4] != "us") {
108       continue;
109     }
110 
111     string category = cols[1];
112     category.resize(category.length() - 1);
113     category = BeautifyCategory(category);
114 
115     string benchmark = cols[2];
116     benchmark.resize(benchmark.length() - 1);
117 
118     double val;
119     if (!base::StringToDouble(cols[3], &val)) {
120       continue;
121     }
122     measurements[category][benchmark].push_back(val);
123   }
124 
125   fclose(fp);
126   return measurements;
127 }
128 
129 // Find the number of trials, for display.
FindNumberOfTrials(const unordered_map<string,unordered_map<string,vector<double>>> & measurements,unsigned & min_num_trials,unsigned & max_num_trials)130 void FindNumberOfTrials(
131     const unordered_map<string, unordered_map<string, vector<double>>>&
132         measurements,
133     unsigned& min_num_trials,
134     unsigned& max_num_trials) {
135   for (const auto& [category, entry] : measurements) {
136     for (const auto& [benchmark, samples] : entry) {
137       min_num_trials = min<unsigned>(min_num_trials, samples.size());
138       max_num_trials = max<unsigned>(max_num_trials, samples.size());
139     }
140   }
141 }
142 
143 struct Label {
144   string benchmark;
145   size_t data_index;
146 };
147 
148 }  // namespace
149 
main(int argc,char ** argv)150 int main(int argc, char** argv) {
151   if (argc != 3) {
152     fprintf(stderr, "USAGE: compare_blink_perf OLD_LOG NEW_LOG\n");
153     exit(1);
154   }
155 
156   unordered_map<string, unordered_map<string, vector<double>>> before =
157       ReadFile(argv[1]);
158   unordered_map<string, unordered_map<string, vector<double>>> after =
159       ReadFile(argv[2]);
160 
161   unsigned min_num_trials = numeric_limits<unsigned>::max();
162   unsigned max_num_trials = numeric_limits<unsigned>::min();
163   FindNumberOfTrials(before, min_num_trials, max_num_trials);
164   FindNumberOfTrials(after, min_num_trials, max_num_trials);
165   if (min_num_trials == max_num_trials) {
166     printf("%u trial(s) on each side.\n", min_num_trials);
167   } else {
168     printf("%u–%u trial(s) on each side.\n", min_num_trials, max_num_trials);
169   }
170 
171   // Now pair up the data. (The estimator treats them as unpaired,
172   // but currently needs them to be of the same length within a single
173   // benchmark.) We do one run per category, so that we can get
174   // geometric means over them (RatioBootstrapEstimator doesn't support
175   // arbitrary grouping).
176   vector<string> sorted_categories;
177   for (const auto& [category, entry] : before) {
178     sorted_categories.push_back(category);
179   }
180   sort(sorted_categories.begin(), sorted_categories.end(),
181        [](const string& a, const string& b) {
182          return CodeUnitCompareIgnoringASCIICaseLessThan(a, b);
183        });
184   for (const string& category : sorted_categories) {
185     vector<Label> labels;
186     vector<vector<RatioBootstrapEstimator::Sample>> data;
187     for (const auto& [benchmark, before_samples] :
188          before.find(category)->second) {
189       const auto after_entry = after.find(category);
190       if (after_entry == after.end()) {
191         continue;
192       }
193       const auto after_samples = after_entry->second.find(benchmark);
194       if (after_samples == after_entry->second.end()) {
195         continue;
196       }
197 
198       vector<RatioBootstrapEstimator::Sample> samples;
199       for (unsigned i = 0;
200            i < std::min(before_samples.size(), after_samples->second.size());
201            ++i) {
202         samples.push_back({before_samples[i], after_samples->second[i]});
203       }
204       labels.emplace_back(Label{benchmark, data.size()});
205       data.push_back(std::move(samples));
206     }
207 
208     RatioBootstrapEstimator estimator(base::RandUint64());
209     const unsigned kNumResamples = 2000;
210     vector<RatioBootstrapEstimator::Estimate> estimates =
211         estimator.ComputeRatioEstimates(data, kNumResamples,
212                                         /*confidence_level=*/0.95,
213                                         /*compute_geometric_mean=*/true);
214 
215     // Sort the labels for display.
216     sort(labels.begin(), labels.end(), [](const Label& a, const Label& b) {
217       return CodeUnitCompareIgnoringASCIICaseLessThan(a.benchmark, b.benchmark);
218     });
219 
220     printf("\n");
221     printf("%-20s %9s %9s %7s %17s\n", category.c_str(), "Before", "After",
222            "Perf", "95% CI (BCa)");
223     printf(
224         "=================== ========= ========= ======= "
225         "=================\n");
226     for (const Label& label : labels) {
227       // RatioBootstrapEstimator doesn't give us the plain means, so compute
228       // that by hand.
229       double sum_before = 0.0, sum_after = 0.0;
230       for (const RatioBootstrapEstimator::Sample& sample :
231            data[label.data_index]) {
232         sum_before += sample.before;
233         sum_after += sample.after;
234       }
235       double mean_before = sum_before / data[label.data_index].size();
236       double mean_after = sum_after / data[label.data_index].size();
237 
238       const RatioBootstrapEstimator::Estimate& estimate =
239           estimates[label.data_index];
240       printf("%-19s %9.0f %9.0f %+6.1f%%  [%+5.1f%%, %+5.1f%%]\n",
241              label.benchmark.c_str(), mean_before, mean_after,
242              100.0 * (estimate.point_estimate - 1.0),
243              100.0 * (estimate.lower - 1.0), 100.0 * (estimate.upper - 1.0));
244     }
245 
246     const RatioBootstrapEstimator::Estimate& estimate = estimates[data.size()];
247     printf("%-19s %9s %9s %+6.1f%%  [%+5.1f%%, %+5.1f%%]\n", "Geometric mean",
248            "", "", 100.0 * (estimate.point_estimate - 1.0),
249            100.0 * (estimate.lower - 1.0), 100.0 * (estimate.upper - 1.0));
250   }
251 }
252