1 // Copyright 2021 Google Inc. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef BENCHMARK_PERF_COUNTERS_H 16 #define BENCHMARK_PERF_COUNTERS_H 17 18 #include <array> 19 #include <cstdint> 20 #include <vector> 21 22 #include "benchmark/benchmark.h" 23 #include "check.h" 24 #include "log.h" 25 26 #ifndef BENCHMARK_OS_WINDOWS 27 #include <unistd.h> 28 #endif 29 30 namespace benchmark { 31 namespace internal { 32 33 // Typically, we can only read a small number of counters. There is also a 34 // padding preceding counter values, when reading multiple counters with one 35 // syscall (which is desirable). PerfCounterValues abstracts these details. 36 // The implementation ensures the storage is inlined, and allows 0-based 37 // indexing into the counter values. 38 // The object is used in conjunction with a PerfCounters object, by passing it 39 // to Snapshot(). The values are populated such that 40 // perfCounters->names()[i]'s value is obtained at position i (as given by 41 // operator[]) of this object. 42 class PerfCounterValues { 43 public: PerfCounterValues(size_t nr_counters)44 explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) { 45 BM_CHECK_LE(nr_counters_, kMaxCounters); 46 } 47 48 uint64_t operator[](size_t pos) const { return values_[kPadding + pos]; } 49 50 static constexpr size_t kMaxCounters = 3; 51 52 private: 53 friend class PerfCounters; 54 // Get the byte buffer in which perf counters can be captured. 55 // This is used by PerfCounters::Read get_data_buffer()56 std::pair<char*, size_t> get_data_buffer() { 57 return {reinterpret_cast<char*>(values_.data()), 58 sizeof(uint64_t) * (kPadding + nr_counters_)}; 59 } 60 61 static constexpr size_t kPadding = 1; 62 std::array<uint64_t, kPadding + kMaxCounters> values_; 63 const size_t nr_counters_; 64 }; 65 66 // Collect PMU counters. The object, once constructed, is ready to be used by 67 // calling read(). PMU counter collection is enabled from the time create() is 68 // called, to obtain the object, until the object's destructor is called. 69 class PerfCounters final { 70 public: 71 // True iff this platform supports performance counters. 72 static const bool kSupported; 73 IsValid()74 bool IsValid() const { return is_valid_; } NoCounters()75 static PerfCounters NoCounters() { return PerfCounters(); } 76 77 ~PerfCounters(); 78 PerfCounters(PerfCounters&&) = default; 79 PerfCounters(const PerfCounters&) = delete; 80 81 // Platform-specific implementations may choose to do some library 82 // initialization here. 83 static bool Initialize(); 84 85 // Return a PerfCounters object ready to read the counters with the names 86 // specified. The values are user-mode only. The counter name format is 87 // implementation and OS specific. 88 // TODO: once we move to C++-17, this should be a std::optional, and then the 89 // IsValid() boolean can be dropped. 90 static PerfCounters Create(const std::vector<std::string>& counter_names); 91 92 // Take a snapshot of the current value of the counters into the provided 93 // valid PerfCounterValues storage. The values are populated such that: 94 // names()[i]'s value is (*values)[i] Snapshot(PerfCounterValues * values)95 BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const { 96 #ifndef BENCHMARK_OS_WINDOWS 97 assert(values != nullptr); 98 assert(IsValid()); 99 auto buffer = values->get_data_buffer(); 100 auto read_bytes = ::read(counter_ids_[0], buffer.first, buffer.second); 101 return static_cast<size_t>(read_bytes) == buffer.second; 102 #else 103 (void)values; 104 return false; 105 #endif 106 } 107 names()108 const std::vector<std::string>& names() const { return counter_names_; } num_counters()109 size_t num_counters() const { return counter_names_.size(); } 110 111 private: PerfCounters(const std::vector<std::string> & counter_names,std::vector<int> && counter_ids)112 PerfCounters(const std::vector<std::string>& counter_names, 113 std::vector<int>&& counter_ids) 114 : counter_ids_(std::move(counter_ids)), 115 counter_names_(counter_names), 116 is_valid_(true) {} PerfCounters()117 PerfCounters() : is_valid_(false) {} 118 119 std::vector<int> counter_ids_; 120 const std::vector<std::string> counter_names_; 121 const bool is_valid_; 122 }; 123 124 // Typical usage of the above primitives. 125 class PerfCountersMeasurement final { 126 public: PerfCountersMeasurement(PerfCounters && c)127 PerfCountersMeasurement(PerfCounters&& c) 128 : counters_(std::move(c)), 129 start_values_(counters_.IsValid() ? counters_.names().size() : 0), 130 end_values_(counters_.IsValid() ? counters_.names().size() : 0) {} 131 IsValid()132 bool IsValid() const { return counters_.IsValid(); } 133 Start()134 BENCHMARK_ALWAYS_INLINE void Start() { 135 assert(IsValid()); 136 // Tell the compiler to not move instructions above/below where we take 137 // the snapshot. 138 ClobberMemory(); 139 counters_.Snapshot(&start_values_); 140 ClobberMemory(); 141 } 142 143 BENCHMARK_ALWAYS_INLINE std::vector<std::pair<std::string, double>> StopAndGetMeasurements()144 StopAndGetMeasurements() { 145 assert(IsValid()); 146 // Tell the compiler to not move instructions above/below where we take 147 // the snapshot. 148 ClobberMemory(); 149 counters_.Snapshot(&end_values_); 150 ClobberMemory(); 151 152 std::vector<std::pair<std::string, double>> ret; 153 for (size_t i = 0; i < counters_.names().size(); ++i) { 154 double measurement = static_cast<double>(end_values_[i]) - 155 static_cast<double>(start_values_[i]); 156 ret.push_back({counters_.names()[i], measurement}); 157 } 158 return ret; 159 } 160 161 private: 162 PerfCounters counters_; 163 PerfCounterValues start_values_; 164 PerfCounterValues end_values_; 165 }; 166 167 BENCHMARK_UNUSED static bool perf_init_anchor = PerfCounters::Initialize(); 168 169 } // namespace internal 170 } // namespace benchmark 171 172 #endif // BENCHMARK_PERF_COUNTERS_H 173