1 // Copyright 2021 Google Inc. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef BENCHMARK_PERF_COUNTERS_H 16 #define BENCHMARK_PERF_COUNTERS_H 17 18 #include <array> 19 #include <cstdint> 20 #include <cstring> 21 #include <memory> 22 #include <vector> 23 24 #include "benchmark/benchmark.h" 25 #include "check.h" 26 #include "log.h" 27 #include "mutex.h" 28 29 #ifndef BENCHMARK_OS_WINDOWS 30 #include <unistd.h> 31 #endif 32 33 #if defined(_MSC_VER) 34 #pragma warning(push) 35 // C4251: <symbol> needs to have dll-interface to be used by clients of class 36 #pragma warning(disable : 4251) 37 #endif 38 39 namespace benchmark { 40 namespace internal { 41 42 // Typically, we can only read a small number of counters. There is also a 43 // padding preceding counter values, when reading multiple counters with one 44 // syscall (which is desirable). PerfCounterValues abstracts these details. 45 // The implementation ensures the storage is inlined, and allows 0-based 46 // indexing into the counter values. 47 // The object is used in conjunction with a PerfCounters object, by passing it 48 // to Snapshot(). The Read() method relocates individual reads, discarding 49 // the initial padding from each group leader in the values buffer such that 50 // all user accesses through the [] operator are correct. 51 class BENCHMARK_EXPORT PerfCounterValues { 52 public: PerfCounterValues(size_t nr_counters)53 explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) { 54 BM_CHECK_LE(nr_counters_, kMaxCounters); 55 } 56 57 // We are reading correctly now so the values don't need to skip padding 58 uint64_t operator[](size_t pos) const { return values_[pos]; } 59 60 // Increased the maximum to 32 only since the buffer 61 // is std::array<> backed 62 static constexpr size_t kMaxCounters = 32; 63 64 private: 65 friend class PerfCounters; 66 // Get the byte buffer in which perf counters can be captured. 67 // This is used by PerfCounters::Read get_data_buffer()68 std::pair<char*, size_t> get_data_buffer() { 69 return {reinterpret_cast<char*>(values_.data()), 70 sizeof(uint64_t) * (kPadding + nr_counters_)}; 71 } 72 73 // This reading is complex and as the goal of this class is to 74 // abstract away the intrincacies of the reading process, this is 75 // a better place for it 76 size_t Read(const std::vector<int>& leaders); 77 78 // Move the padding to 2 due to the reading algorithm (1st padding plus a 79 // current read padding) 80 static constexpr size_t kPadding = 2; 81 std::array<uint64_t, kPadding + kMaxCounters> values_; 82 const size_t nr_counters_; 83 }; 84 85 // Collect PMU counters. The object, once constructed, is ready to be used by 86 // calling read(). PMU counter collection is enabled from the time create() is 87 // called, to obtain the object, until the object's destructor is called. 88 class BENCHMARK_EXPORT PerfCounters final { 89 public: 90 // True iff this platform supports performance counters. 91 static const bool kSupported; 92 93 // Returns an empty object NoCounters()94 static PerfCounters NoCounters() { return PerfCounters(); } 95 ~PerfCounters()96 ~PerfCounters() { CloseCounters(); } 97 PerfCounters() = default; 98 PerfCounters(PerfCounters&&) = default; 99 PerfCounters(const PerfCounters&) = delete; 100 PerfCounters& operator=(PerfCounters&&) noexcept; 101 PerfCounters& operator=(const PerfCounters&) = delete; 102 103 // Platform-specific implementations may choose to do some library 104 // initialization here. 105 static bool Initialize(); 106 107 // Check if the given counter is supported, if the app wants to 108 // check before passing 109 static bool IsCounterSupported(const std::string& name); 110 111 // Return a PerfCounters object ready to read the counters with the names 112 // specified. The values are user-mode only. The counter name format is 113 // implementation and OS specific. 114 // In case of failure, this method will in the worst case return an 115 // empty object whose state will still be valid. 116 static PerfCounters Create(const std::vector<std::string>& counter_names); 117 118 // Take a snapshot of the current value of the counters into the provided 119 // valid PerfCounterValues storage. The values are populated such that: 120 // names()[i]'s value is (*values)[i] Snapshot(PerfCounterValues * values)121 BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const { 122 #ifndef BENCHMARK_OS_WINDOWS 123 assert(values != nullptr); 124 return values->Read(leader_ids_) == counter_ids_.size(); 125 #else 126 (void)values; 127 return false; 128 #endif 129 } 130 names()131 const std::vector<std::string>& names() const { return counter_names_; } num_counters()132 size_t num_counters() const { return counter_names_.size(); } 133 134 private: PerfCounters(const std::vector<std::string> & counter_names,std::vector<int> && counter_ids,std::vector<int> && leader_ids)135 PerfCounters(const std::vector<std::string>& counter_names, 136 std::vector<int>&& counter_ids, std::vector<int>&& leader_ids) 137 : counter_ids_(std::move(counter_ids)), 138 leader_ids_(std::move(leader_ids)), 139 counter_names_(counter_names) {} 140 141 void CloseCounters() const; 142 143 std::vector<int> counter_ids_; 144 std::vector<int> leader_ids_; 145 std::vector<std::string> counter_names_; 146 }; 147 148 // Typical usage of the above primitives. 149 class BENCHMARK_EXPORT PerfCountersMeasurement final { 150 public: 151 PerfCountersMeasurement(const std::vector<std::string>& counter_names); 152 num_counters()153 size_t num_counters() const { return counters_.num_counters(); } 154 names()155 std::vector<std::string> names() const { return counters_.names(); } 156 Start()157 BENCHMARK_ALWAYS_INLINE bool Start() { 158 if (num_counters() == 0) return true; 159 // Tell the compiler to not move instructions above/below where we take 160 // the snapshot. 161 ClobberMemory(); 162 valid_read_ &= counters_.Snapshot(&start_values_); 163 ClobberMemory(); 164 165 return valid_read_; 166 } 167 Stop(std::vector<std::pair<std::string,double>> & measurements)168 BENCHMARK_ALWAYS_INLINE bool Stop( 169 std::vector<std::pair<std::string, double>>& measurements) { 170 if (num_counters() == 0) return true; 171 // Tell the compiler to not move instructions above/below where we take 172 // the snapshot. 173 ClobberMemory(); 174 valid_read_ &= counters_.Snapshot(&end_values_); 175 ClobberMemory(); 176 177 for (size_t i = 0; i < counters_.names().size(); ++i) { 178 double measurement = static_cast<double>(end_values_[i]) - 179 static_cast<double>(start_values_[i]); 180 measurements.push_back({counters_.names()[i], measurement}); 181 } 182 183 return valid_read_; 184 } 185 186 private: 187 PerfCounters counters_; 188 bool valid_read_ = true; 189 PerfCounterValues start_values_; 190 PerfCounterValues end_values_; 191 }; 192 193 } // namespace internal 194 } // namespace benchmark 195 196 #if defined(_MSC_VER) 197 #pragma warning(pop) 198 #endif 199 200 #endif // BENCHMARK_PERF_COUNTERS_H 201