• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef BENCHMARK_PERF_COUNTERS_H
16 #define BENCHMARK_PERF_COUNTERS_H
17 
18 #include <array>
19 #include <cstdint>
20 #include <cstring>
21 #include <memory>
22 #include <vector>
23 
24 #include "benchmark/benchmark.h"
25 #include "check.h"
26 #include "log.h"
27 #include "mutex.h"
28 
29 #ifndef BENCHMARK_OS_WINDOWS
30 #include <unistd.h>
31 #endif
32 
33 #if defined(_MSC_VER)
34 #pragma warning(push)
35 // C4251: <symbol> needs to have dll-interface to be used by clients of class
36 #pragma warning(disable : 4251)
37 #endif
38 
39 namespace benchmark {
40 namespace internal {
41 
42 // Typically, we can only read a small number of counters. There is also a
43 // padding preceding counter values, when reading multiple counters with one
44 // syscall (which is desirable). PerfCounterValues abstracts these details.
45 // The implementation ensures the storage is inlined, and allows 0-based
46 // indexing into the counter values.
47 // The object is used in conjunction with a PerfCounters object, by passing it
48 // to Snapshot(). The Read() method relocates individual reads, discarding
49 // the initial padding from each group leader in the values buffer such that
50 // all user accesses through the [] operator are correct.
51 class BENCHMARK_EXPORT PerfCounterValues {
52  public:
PerfCounterValues(size_t nr_counters)53   explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) {
54     BM_CHECK_LE(nr_counters_, kMaxCounters);
55   }
56 
57   // We are reading correctly now so the values don't need to skip padding
58   uint64_t operator[](size_t pos) const { return values_[pos]; }
59 
60   // Increased the maximum to 32 only since the buffer
61   // is std::array<> backed
62   static constexpr size_t kMaxCounters = 32;
63 
64  private:
65   friend class PerfCounters;
66   // Get the byte buffer in which perf counters can be captured.
67   // This is used by PerfCounters::Read
get_data_buffer()68   std::pair<char*, size_t> get_data_buffer() {
69     return {reinterpret_cast<char*>(values_.data()),
70             sizeof(uint64_t) * (kPadding + nr_counters_)};
71   }
72 
73   // This reading is complex and as the goal of this class is to
74   // abstract away the intrincacies of the reading process, this is
75   // a better place for it
76   size_t Read(const std::vector<int>& leaders);
77 
78   // Move the padding to 2 due to the reading algorithm (1st padding plus a
79   // current read padding)
80   static constexpr size_t kPadding = 2;
81   std::array<uint64_t, kPadding + kMaxCounters> values_;
82   const size_t nr_counters_;
83 };
84 
85 // Collect PMU counters. The object, once constructed, is ready to be used by
86 // calling read(). PMU counter collection is enabled from the time create() is
87 // called, to obtain the object, until the object's destructor is called.
88 class BENCHMARK_EXPORT PerfCounters final {
89  public:
90   // True iff this platform supports performance counters.
91   static const bool kSupported;
92 
93   // Returns an empty object
NoCounters()94   static PerfCounters NoCounters() { return PerfCounters(); }
95 
~PerfCounters()96   ~PerfCounters() { CloseCounters(); }
97   PerfCounters() = default;
98   PerfCounters(PerfCounters&&) = default;
99   PerfCounters(const PerfCounters&) = delete;
100   PerfCounters& operator=(PerfCounters&&) noexcept;
101   PerfCounters& operator=(const PerfCounters&) = delete;
102 
103   // Platform-specific implementations may choose to do some library
104   // initialization here.
105   static bool Initialize();
106 
107   // Check if the given counter is supported, if the app wants to
108   // check before passing
109   static bool IsCounterSupported(const std::string& name);
110 
111   // Return a PerfCounters object ready to read the counters with the names
112   // specified. The values are user-mode only. The counter name format is
113   // implementation and OS specific.
114   // In case of failure, this method will in the worst case return an
115   // empty object whose state will still be valid.
116   static PerfCounters Create(const std::vector<std::string>& counter_names);
117 
118   // Take a snapshot of the current value of the counters into the provided
119   // valid PerfCounterValues storage. The values are populated such that:
120   // names()[i]'s value is (*values)[i]
Snapshot(PerfCounterValues * values)121   BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const {
122 #ifndef BENCHMARK_OS_WINDOWS
123     assert(values != nullptr);
124     return values->Read(leader_ids_) == counter_ids_.size();
125 #else
126     (void)values;
127     return false;
128 #endif
129   }
130 
names()131   const std::vector<std::string>& names() const { return counter_names_; }
num_counters()132   size_t num_counters() const { return counter_names_.size(); }
133 
134  private:
PerfCounters(const std::vector<std::string> & counter_names,std::vector<int> && counter_ids,std::vector<int> && leader_ids)135   PerfCounters(const std::vector<std::string>& counter_names,
136                std::vector<int>&& counter_ids, std::vector<int>&& leader_ids)
137       : counter_ids_(std::move(counter_ids)),
138         leader_ids_(std::move(leader_ids)),
139         counter_names_(counter_names) {}
140 
141   void CloseCounters() const;
142 
143   std::vector<int> counter_ids_;
144   std::vector<int> leader_ids_;
145   std::vector<std::string> counter_names_;
146 };
147 
148 // Typical usage of the above primitives.
149 class BENCHMARK_EXPORT PerfCountersMeasurement final {
150  public:
151   PerfCountersMeasurement(const std::vector<std::string>& counter_names);
152 
num_counters()153   size_t num_counters() const { return counters_.num_counters(); }
154 
names()155   std::vector<std::string> names() const { return counters_.names(); }
156 
Start()157   BENCHMARK_ALWAYS_INLINE bool Start() {
158     if (num_counters() == 0) return true;
159     // Tell the compiler to not move instructions above/below where we take
160     // the snapshot.
161     ClobberMemory();
162     valid_read_ &= counters_.Snapshot(&start_values_);
163     ClobberMemory();
164 
165     return valid_read_;
166   }
167 
Stop(std::vector<std::pair<std::string,double>> & measurements)168   BENCHMARK_ALWAYS_INLINE bool Stop(
169       std::vector<std::pair<std::string, double>>& measurements) {
170     if (num_counters() == 0) return true;
171     // Tell the compiler to not move instructions above/below where we take
172     // the snapshot.
173     ClobberMemory();
174     valid_read_ &= counters_.Snapshot(&end_values_);
175     ClobberMemory();
176 
177     for (size_t i = 0; i < counters_.names().size(); ++i) {
178       double measurement = static_cast<double>(end_values_[i]) -
179                            static_cast<double>(start_values_[i]);
180       measurements.push_back({counters_.names()[i], measurement});
181     }
182 
183     return valid_read_;
184   }
185 
186  private:
187   PerfCounters counters_;
188   bool valid_read_ = true;
189   PerfCounterValues start_values_;
190   PerfCounterValues end_values_;
191 };
192 
193 }  // namespace internal
194 }  // namespace benchmark
195 
196 #if defined(_MSC_VER)
197 #pragma warning(pop)
198 #endif
199 
200 #endif  // BENCHMARK_PERF_COUNTERS_H
201