// Copyright 2024 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // A utility to compare two runs of a style perftest, in the perftest // output format, and compute confidence intervals. The simplest way // to get the right information is to build a binary before and after // changes and then run it continuously every-other for a while until // you feel you have enough data: // // rm -f old.txt new.txt; \ // while :; do // taskset -c 2,4,6,8 ./out/Release/blink_perf___old \ // --gtest_filter=StyleCalcPerfTest.\* 2>&1 | tee -a old.txt; // taskset -c 2,4,6,8 ./out/Release/blink_perf_tests \ // --gtest_filter=StyleCalcPerfTest.\* 2>&1 | tee -a new.txt; // done // // and then run ./out/Release/compare_blink_perf old.txt new.txt. // (Possibly under watch -n 1 if you want to look at data as it // comes in, though beware of p-hacking.) // // TODO(sesse): Consider whether we should remove the first few // runs, as they are frequently outliers. #include #include #include #include #include #include #include #include "base/rand_util.h" #include "base/strings/string_number_conversions.h" #include "testing/perf/confidence/ratio_bootstrap_estimator.h" #ifdef UNSAFE_BUFFERS_BUILD // Not used with untrusted inputs. #pragma allow_unsafe_buffers #endif using std::max; using std::min; using std::numeric_limits; using std::pair; using std::sort; using std::string; using std::unordered_map; using std::vector; namespace { string BeautifyCategory(const string& category) { if (category == "BlinkStyleInitialCalcTime") { return "Initial style (µs)"; } else if (category == "BlinkStyleRecalcTime") { return "Recalc style (µs)"; } else if (category == "BlinkStyleParseTime") { return "Parse (µs)"; } else { return category; } } bool CodeUnitCompareIgnoringASCIICaseLessThan(const string& a, const string& b) { return lexicographical_compare( a.begin(), a.end(), b.begin(), b.end(), [](char c1, char c2) { return std::tolower(c1) < std::tolower(c2); }); } // The structure is e.g. BlinkStyleParseTime -> Video -> [100 us, 90 us, ...] unordered_map>> ReadFile( const char* filename) { unordered_map>> measurements; FILE* fp = fopen(filename, "r"); if (fp == nullptr) { perror(filename); exit(1); } while (!feof(fp)) { char buf[4096]; if (fgets(buf, sizeof(buf), fp) == nullptr) { break; } string str(buf); if (str.length() > 1 && str[str.length() - 1] == '\n') { str.resize(str.length() - 1); } if (str.length() > 1 && str[str.length() - 1] == '\r') { str.resize(str.length() - 1); } // A result line looks like: *RESULT BlinkStyleParseTime: Video= 11061 us vector cols{""}; for (char ch : str) { if (ch == ' ') { cols.push_back(""); } else { cols.back().push_back(ch); } } if (cols.size() != 5 || cols[0] != "*RESULT" || !cols[1].ends_with(":") || !cols[2].ends_with("=") || cols[4] != "us") { continue; } string category = cols[1]; category.resize(category.length() - 1); category = BeautifyCategory(category); string benchmark = cols[2]; benchmark.resize(benchmark.length() - 1); double val; if (!base::StringToDouble(cols[3], &val)) { continue; } measurements[category][benchmark].push_back(val); } fclose(fp); return measurements; } // Find the number of trials, for display. void FindNumberOfTrials( const unordered_map>>& measurements, unsigned& min_num_trials, unsigned& max_num_trials) { for (const auto& [category, entry] : measurements) { for (const auto& [benchmark, samples] : entry) { min_num_trials = min(min_num_trials, samples.size()); max_num_trials = max(max_num_trials, samples.size()); } } } struct Label { string benchmark; size_t data_index; }; } // namespace int main(int argc, char** argv) { if (argc != 3) { fprintf(stderr, "USAGE: compare_blink_perf OLD_LOG NEW_LOG\n"); exit(1); } unordered_map>> before = ReadFile(argv[1]); unordered_map>> after = ReadFile(argv[2]); unsigned min_num_trials = numeric_limits::max(); unsigned max_num_trials = numeric_limits::min(); FindNumberOfTrials(before, min_num_trials, max_num_trials); FindNumberOfTrials(after, min_num_trials, max_num_trials); if (min_num_trials == max_num_trials) { printf("%u trial(s) on each side.\n", min_num_trials); } else { printf("%u–%u trial(s) on each side.\n", min_num_trials, max_num_trials); } // Now pair up the data. (The estimator treats them as unpaired, // but currently needs them to be of the same length within a single // benchmark.) We do one run per category, so that we can get // geometric means over them (RatioBootstrapEstimator doesn't support // arbitrary grouping). vector sorted_categories; for (const auto& [category, entry] : before) { sorted_categories.push_back(category); } sort(sorted_categories.begin(), sorted_categories.end(), [](const string& a, const string& b) { return CodeUnitCompareIgnoringASCIICaseLessThan(a, b); }); for (const string& category : sorted_categories) { vector