• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <random>
2 #include <thread>
3 
4 #include "../src/perf_counters.h"
5 #include "gtest/gtest.h"
6 
7 #ifndef GTEST_SKIP
8 struct MsgHandler {
operator =MsgHandler9   void operator=(std::ostream&) {}
10 };
11 #define GTEST_SKIP() return MsgHandler() = std::cout
12 #endif
13 
14 using benchmark::internal::PerfCounters;
15 using benchmark::internal::PerfCountersMeasurement;
16 using benchmark::internal::PerfCounterValues;
17 
18 namespace {
19 const char kGenericPerfEvent1[] = "CYCLES";
20 const char kGenericPerfEvent2[] = "BRANCHES";
21 const char kGenericPerfEvent3[] = "INSTRUCTIONS";
22 
TEST(PerfCountersTest,Init)23 TEST(PerfCountersTest, Init) {
24   EXPECT_EQ(PerfCounters::Initialize(), PerfCounters::kSupported);
25 }
26 
TEST(PerfCountersTest,OneCounter)27 TEST(PerfCountersTest, OneCounter) {
28   if (!PerfCounters::kSupported) {
29     GTEST_SKIP() << "Performance counters not supported.\n";
30   }
31   EXPECT_TRUE(PerfCounters::Initialize());
32   EXPECT_EQ(PerfCounters::Create({kGenericPerfEvent1}).num_counters(), 1);
33 }
34 
TEST(PerfCountersTest,NegativeTest)35 TEST(PerfCountersTest, NegativeTest) {
36   if (!PerfCounters::kSupported) {
37     EXPECT_FALSE(PerfCounters::Initialize());
38     return;
39   }
40   EXPECT_TRUE(PerfCounters::Initialize());
41   // Sanity checks
42   // Create() will always create a valid object, even if passed no or
43   // wrong arguments as the new behavior is to warn and drop unsupported
44   // counters
45   EXPECT_EQ(PerfCounters::Create({}).num_counters(), 0);
46   EXPECT_EQ(PerfCounters::Create({""}).num_counters(), 0);
47   EXPECT_EQ(PerfCounters::Create({"not a counter name"}).num_counters(), 0);
48   {
49     // Try sneaking in a bad egg to see if it is filtered out. The
50     // number of counters has to be two, not zero
51     auto counter =
52         PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1});
53     EXPECT_EQ(counter.num_counters(), 2);
54     EXPECT_EQ(counter.names(), std::vector<std::string>(
55                                    {kGenericPerfEvent2, kGenericPerfEvent1}));
56   }
57   {
58     // Try sneaking in an outrageous counter, like a fat finger mistake
59     auto counter = PerfCounters::Create(
60         {kGenericPerfEvent3, "not a counter name", kGenericPerfEvent1});
61     EXPECT_EQ(counter.num_counters(), 2);
62     EXPECT_EQ(counter.names(), std::vector<std::string>(
63                                    {kGenericPerfEvent3, kGenericPerfEvent1}));
64   }
65   {
66     // Finally try a golden input - it should like all them
67     EXPECT_EQ(PerfCounters::Create(
68                   {kGenericPerfEvent1, kGenericPerfEvent2, kGenericPerfEvent3})
69                   .num_counters(),
70               3);
71   }
72   {
73     // Add a bad apple in the end of the chain to check the edges
74     auto counter = PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
75                                          kGenericPerfEvent3,
76                                          "MISPREDICTED_BRANCH_RETIRED"});
77     EXPECT_EQ(counter.num_counters(), 3);
78     EXPECT_EQ(counter.names(),
79               std::vector<std::string>({kGenericPerfEvent1, kGenericPerfEvent2,
80                                         kGenericPerfEvent3}));
81   }
82 }
83 
TEST(PerfCountersTest,Read1Counter)84 TEST(PerfCountersTest, Read1Counter) {
85   if (!PerfCounters::kSupported) {
86     GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
87   }
88   EXPECT_TRUE(PerfCounters::Initialize());
89   auto counters = PerfCounters::Create({kGenericPerfEvent1});
90   EXPECT_EQ(counters.num_counters(), 1);
91   PerfCounterValues values1(1);
92   EXPECT_TRUE(counters.Snapshot(&values1));
93   EXPECT_GT(values1[0], 0);
94   PerfCounterValues values2(1);
95   EXPECT_TRUE(counters.Snapshot(&values2));
96   EXPECT_GT(values2[0], 0);
97   EXPECT_GT(values2[0], values1[0]);
98 }
99 
TEST(PerfCountersTest,Read2Counters)100 TEST(PerfCountersTest, Read2Counters) {
101   if (!PerfCounters::kSupported) {
102     GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
103   }
104   EXPECT_TRUE(PerfCounters::Initialize());
105   auto counters =
106       PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
107   EXPECT_EQ(counters.num_counters(), 2);
108   PerfCounterValues values1(2);
109   EXPECT_TRUE(counters.Snapshot(&values1));
110   EXPECT_GT(values1[0], 0);
111   EXPECT_GT(values1[1], 0);
112   PerfCounterValues values2(2);
113   EXPECT_TRUE(counters.Snapshot(&values2));
114   EXPECT_GT(values2[0], 0);
115   EXPECT_GT(values2[1], 0);
116 }
117 
TEST(PerfCountersTest,ReopenExistingCounters)118 TEST(PerfCountersTest, ReopenExistingCounters) {
119   // This test works in recent and old Intel hardware
120   // However we cannot make assumptions beyond 3 HW counters
121   if (!PerfCounters::kSupported) {
122     GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
123   }
124   EXPECT_TRUE(PerfCounters::Initialize());
125   std::vector<std::string> kMetrics({kGenericPerfEvent1});
126   std::vector<PerfCounters> counters(3);
127   for (auto& counter : counters) {
128     counter = PerfCounters::Create(kMetrics);
129   }
130   PerfCounterValues values(1);
131   EXPECT_TRUE(counters[0].Snapshot(&values));
132   EXPECT_TRUE(counters[1].Snapshot(&values));
133   EXPECT_TRUE(counters[2].Snapshot(&values));
134 }
135 
TEST(PerfCountersTest,CreateExistingMeasurements)136 TEST(PerfCountersTest, CreateExistingMeasurements) {
137   // The test works (i.e. causes read to fail) for the assumptions
138   // about hardware capabilities (i.e. small number (3) hardware
139   // counters) at this date,
140   // the same as previous test ReopenExistingCounters.
141   if (!PerfCounters::kSupported) {
142     GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
143   }
144   EXPECT_TRUE(PerfCounters::Initialize());
145 
146   // This means we will try 10 counters but we can only guarantee
147   // for sure at this time that only 3 will work. Perhaps in the future
148   // we could use libpfm to query for the hardware limits on this
149   // particular platform.
150   const int kMaxCounters = 10;
151   const int kMinValidCounters = 3;
152 
153   // Let's use a ubiquitous counter that is guaranteed to work
154   // on all platforms
155   const std::vector<std::string> kMetrics{"cycles"};
156 
157   // Cannot create a vector of actual objects because the
158   // copy constructor of PerfCounters is deleted - and so is
159   // implicitly deleted on PerfCountersMeasurement too
160   std::vector<std::unique_ptr<PerfCountersMeasurement>>
161       perf_counter_measurements;
162 
163   perf_counter_measurements.reserve(kMaxCounters);
164   for (int j = 0; j < kMaxCounters; ++j) {
165     perf_counter_measurements.emplace_back(
166         new PerfCountersMeasurement(kMetrics));
167   }
168 
169   std::vector<std::pair<std::string, double>> measurements;
170 
171   // Start all counters together to see if they hold
172   int max_counters = kMaxCounters;
173   for (int i = 0; i < kMaxCounters; ++i) {
174     auto& counter(*perf_counter_measurements[i]);
175     EXPECT_EQ(counter.num_counters(), 1);
176     if (!counter.Start()) {
177       max_counters = i;
178       break;
179     };
180   }
181 
182   ASSERT_GE(max_counters, kMinValidCounters);
183 
184   // Start all together
185   for (int i = 0; i < max_counters; ++i) {
186     auto& counter(*perf_counter_measurements[i]);
187     EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters));
188   }
189 
190   // Start/stop individually
191   for (int i = 0; i < max_counters; ++i) {
192     auto& counter(*perf_counter_measurements[i]);
193     measurements.clear();
194     counter.Start();
195     EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters));
196   }
197 }
198 
199 // We try to do some meaningful work here but the compiler
200 // insists in optimizing away our loop so we had to add a
201 // no-optimize macro. In case it fails, we added some entropy
202 // to this pool as well.
203 
do_work()204 BENCHMARK_DONT_OPTIMIZE size_t do_work() {
205   static std::mt19937 rd{std::random_device{}()};
206   static std::uniform_int_distribution<size_t> mrand(0, 10);
207   const size_t kNumLoops = 1000000;
208   size_t sum = 0;
209   for (size_t j = 0; j < kNumLoops; ++j) {
210     sum += mrand(rd);
211   }
212   benchmark::DoNotOptimize(sum);
213   return sum;
214 }
215 
measure(size_t threadcount,PerfCounterValues * before,PerfCounterValues * after)216 void measure(size_t threadcount, PerfCounterValues* before,
217              PerfCounterValues* after) {
218   BM_CHECK_NE(before, nullptr);
219   BM_CHECK_NE(after, nullptr);
220   std::vector<std::thread> threads(threadcount);
221   auto work = [&]() { BM_CHECK(do_work() > 1000); };
222 
223   // We need to first set up the counters, then start the threads, so the
224   // threads would inherit the counters. But later, we need to first destroy
225   // the thread pool (so all the work finishes), then measure the counters. So
226   // the scopes overlap, and we need to explicitly control the scope of the
227   // threadpool.
228   auto counters =
229       PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent3});
230   for (auto& t : threads) t = std::thread(work);
231   counters.Snapshot(before);
232   for (auto& t : threads) t.join();
233   counters.Snapshot(after);
234 }
235 
TEST(PerfCountersTest,MultiThreaded)236 TEST(PerfCountersTest, MultiThreaded) {
237   if (!PerfCounters::kSupported) {
238     GTEST_SKIP() << "Test skipped because libpfm is not supported.";
239   }
240   EXPECT_TRUE(PerfCounters::Initialize());
241   PerfCounterValues before(2);
242   PerfCounterValues after(2);
243 
244   // Notice that this test will work even if we taskset it to a single CPU
245   // In this case the threads will run sequentially
246   // Start two threads and measure the number of combined cycles and
247   // instructions
248   measure(2, &before, &after);
249   std::vector<double> Elapsed2Threads{
250       static_cast<double>(after[0] - before[0]),
251       static_cast<double>(after[1] - before[1])};
252 
253   // Start four threads and measure the number of combined cycles and
254   // instructions
255   measure(4, &before, &after);
256   std::vector<double> Elapsed4Threads{
257       static_cast<double>(after[0] - before[0]),
258       static_cast<double>(after[1] - before[1])};
259 
260   // Some extra work will happen on the main thread - like joining the threads
261   // - so the ratio won't be quite 2.0, but very close.
262   EXPECT_GE(Elapsed4Threads[0], 1.9 * Elapsed2Threads[0]);
263   EXPECT_GE(Elapsed4Threads[1], 1.9 * Elapsed2Threads[1]);
264 }
265 
TEST(PerfCountersTest,HardwareLimits)266 TEST(PerfCountersTest, HardwareLimits) {
267   // The test works (i.e. causes read to fail) for the assumptions
268   // about hardware capabilities (i.e. small number (3-4) hardware
269   // counters) at this date,
270   // the same as previous test ReopenExistingCounters.
271   if (!PerfCounters::kSupported) {
272     GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
273   }
274   EXPECT_TRUE(PerfCounters::Initialize());
275 
276   // Taken straight from `perf list` on x86-64
277   // Got all hardware names since these are the problematic ones
278   std::vector<std::string> counter_names{"cycles",  // leader
279                                          "instructions",
280                                          "branches",
281                                          "L1-dcache-loads",
282                                          "L1-dcache-load-misses",
283                                          "L1-dcache-prefetches",
284                                          "L1-icache-load-misses",  // leader
285                                          "L1-icache-loads",
286                                          "branch-load-misses",
287                                          "branch-loads",
288                                          "dTLB-load-misses",
289                                          "dTLB-loads",
290                                          "iTLB-load-misses",  // leader
291                                          "iTLB-loads",
292                                          "branch-instructions",
293                                          "branch-misses",
294                                          "cache-misses",
295                                          "cache-references",
296                                          "stalled-cycles-backend",  // leader
297                                          "stalled-cycles-frontend"};
298 
299   // In the off-chance that some of these values are not supported,
300   // we filter them out so the test will complete without failure
301   // albeit it might not actually test the grouping on that platform
302   std::vector<std::string> valid_names;
303   for (const std::string& name : counter_names) {
304     if (PerfCounters::IsCounterSupported(name)) {
305       valid_names.push_back(name);
306     }
307   }
308   PerfCountersMeasurement counter(valid_names);
309 
310   std::vector<std::pair<std::string, double>> measurements;
311 
312   counter.Start();
313   EXPECT_TRUE(counter.Stop(measurements));
314 }
315 
316 }  // namespace
317