• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2024 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // A small tool to take MotionMark/Speedometer CSV files from Pinpoint
6 // and compute confidence intervals. Not intended as a general CSV reader
7 // (we don't do things like escaping and quoting).
8 //
9 // _ci refers to confidence intervals, not continuous integration.
10 
11 #include <stdio.h>
12 #include <stdlib.h>
13 
14 #include <ranges>
15 #include <string>
16 #include <unordered_map>
17 #include <utility>
18 #include <vector>
19 
20 #include "base/files/file_util.h"
21 #include "base/rand_util.h"
22 #include "base/strings/string_split.h"
23 #include "testing/perf/confidence/ratio_bootstrap_estimator.h"
24 
25 #ifdef UNSAFE_BUFFERS_BUILD
26 // Not used with untrusted inputs.
27 #pragma allow_unsafe_buffers
28 #endif
29 
30 using std::pair;
31 using std::string;
32 using std::string_view;
33 using std::unordered_map;
34 using std::vector;
35 
36 namespace {
37 
SplitCSVLine(string_view str)38 vector<string_view> SplitCSVLine(string_view str) {
39   if (str.length() > 1 && str[str.length() - 1] == '\r') {
40     str = str.substr(0, str.size() - 1);
41   }
42   return base::SplitStringPiece(str, ",", base::KEEP_WHITESPACE,
43                                 base::SPLIT_WANT_ALL);
44 }
45 
ReadCSV(const char * filename)46 vector<unordered_map<string, string>> ReadCSV(const char* filename) {
47   string contents;
48   if (!base::ReadFileToString(
49           base::FilePath::FromUTF8Unsafe(string_view(filename)), &contents)) {
50     perror(filename);
51     exit(1);
52   }
53 
54   vector<string_view> lines = base::SplitStringPiece(
55       contents, "\n", base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
56   vector<string_view> headers = SplitCSVLine(lines[0]);
57   if (headers.empty()) {
58     fprintf(stderr, "%s: Empty header line!\n", filename);
59     exit(1);
60   }
61 
62   vector<unordered_map<string, string>> result;
63   for (unsigned i = 1; i < lines.size(); ++i) {
64     vector<string_view> line = SplitCSVLine(lines[i]);
65     if (line.size() != headers.size()) {
66       fprintf(stderr, "%s: Line had %zu columns, expected %zu\n", filename,
67               line.size(), headers.size());
68       break;
69     }
70 
71     unordered_map<string, string> fields;
72     for (unsigned j = 0; j < line.size(); ++j) {
73       fields.emplace(headers[j], std::move(line[j]));
74     }
75     result.push_back(std::move(fields));
76   }
77 
78   return result;
79 }
80 
81 }  // namespace
82 
main(int argc,char ** argv)83 int main(int argc, char** argv) {
84   if (argc < 2 || argc > 3) {
85     fprintf(stderr, "USAGE: pinpoint_ci CSV_FILE [CONFIDENCE_LEVEL]\n");
86     exit(1);
87   }
88 
89   // The default 0.99 matches Pinpoint.
90   double confidence_level = (argc > 2) ? atof(argv[2]) : 0.99;
91   unordered_map<string, pair<vector<double>, vector<double>>> samples;
92   bool any_is_speedometer = false;
93 
94   for (unordered_map<string, string>& line : ReadCSV(argv[1])) {
95     if (line.count("name") == 0 || line.count("displayLabel") == 0 ||
96         line.count("avg") == 0) {
97       continue;
98     }
99     const string& name = line["name"];
100     const string& display_label = line["displayLabel"];
101     double avg = atof(line["avg"].c_str());
102     bool is_motionmark =
103         name == "motionmark" || line.count("motionmarkTag") != 0;
104     bool is_speedometer =
105         name.find("TodoMVC") != string::npos ||
106         (line.count("stories") != 0 && line["stories"] == "Speedometer3");
107     if (!is_motionmark && !is_speedometer) {
108       // Not the core metrics we are looking for.
109       continue;
110     }
111     any_is_speedometer |= is_speedometer;
112     if (name.find("/") != string::npos || name.find("Lower") != string::npos ||
113         name.find("Upper") != string::npos) {
114       // More sub-metrics.
115       continue;
116     }
117     string story;
118     if (name == "motionmark") {
119       if (line.count("stories") == 0) {
120         fprintf(stderr, "WARNING: Could not find MotionMark story\n");
121         continue;
122       }
123       story = line["stories"];
124     } else {
125       story = name;
126     }
127 
128     if (display_label.find("base:") != string::npos) {
129       samples[story].first.push_back(avg);
130     } else if (display_label.find("exp:") != string::npos) {
131       samples[story].second.push_back(avg);
132     } else {
133       fprintf(stderr, "WARNING: Unknown display_label %s\n",
134               display_label.c_str());
135     }
136   }
137 
138   // Estimate the ratios for all of our data.
139   vector<vector<RatioBootstrapEstimator::Sample>> data;
140   for (const auto& [key, story_samples] : samples) {
141     // These should always be the same in Pinpoint, but just to be sure.
142     unsigned num_samples =
143         std::min(story_samples.first.size(), story_samples.second.size());
144     vector<RatioBootstrapEstimator::Sample> story_data;
145     for (unsigned i = 0; i < num_samples; ++i) {
146       story_data.push_back(RatioBootstrapEstimator::Sample{
147           story_samples.first[i], story_samples.second[i]});
148     }
149     data.push_back(std::move(story_data));
150   }
151   RatioBootstrapEstimator estimator(base::RandUint64());
152   constexpr int kNumRuns = 2000;
153   vector<RatioBootstrapEstimator::Estimate> estimates =
154       estimator.ComputeRatioEstimates(data, kNumRuns, confidence_level,
155                                       /*compute_geometric_mean=*/false);
156 
157   // Sort by name, then print. (We assume all names are ASCII.)
158   unsigned data_index = 0;
159   int max_key_len = 0;
160   vector<pair<string, RatioBootstrapEstimator::Estimate>> to_print;
161   for (const auto& [key, story_samples] : samples) {
162     to_print.emplace_back(key, std::move(estimates[data_index]));
163     ++data_index;
164     max_key_len = std::max<int>(max_key_len, key.length());
165   }
166   std::ranges::sort(
167       to_print, [](const pair<string, RatioBootstrapEstimator::Estimate>& a,
168                    const pair<string, RatioBootstrapEstimator::Estimate>& b) {
169         return a.first < b.first;
170       });
171   for (const auto& [key, estimate] : to_print) {
172     // Convert from ratios to percent change. For Pinpoint, higher-is-better,
173     // so we also need to convert from before/after to after/before.
174     double lower = 100.0 * (1.0 / estimate.upper - 1.0);
175     double upper = 100.0 * (1.0 / estimate.lower - 1.0);
176 
177     // If our confidence interval doesn't touch 100%, we know (at the given
178     // confidence level) that there is a real change. It might be a bit
179     // confusing when an interval with -0.0% or +0.0% is shown as significant
180     // (due to rounding), but this is probably confusing no matter what we do.
181     const char* emoji = "  ";
182     if (lower > 0.0 && upper > 0.0) {
183       if (any_is_speedometer && key != "Score") {
184         // For Speedometer, lower is better (except for Score),
185         // so adjust the thumbs accordingly. We could flip the values, too,
186         // for ease of understanding, but be consistent with Pinpoint.
187         emoji = "��";
188       } else {
189         emoji = "��";
190       }
191     } else if (lower < -0.0 && upper < -0.0) {
192       if (any_is_speedometer && key != "Score") {
193         emoji = "��";
194       } else {
195         emoji = "��";
196       }
197     }
198 
199     printf("%s %-*s  [%+5.1f%%, %+5.1f%%]\n", emoji, max_key_len, key.c_str(),
200            lower, upper);
201   }
202 }
203