• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "perf_counters.h"
16 
17 #include <cstring>
18 #include <memory>
19 #include <vector>
20 
21 #if defined HAVE_LIBPFM
22 #include "perfmon/pfmlib.h"
23 #include "perfmon/pfmlib_perf_event.h"
24 #endif
25 
26 namespace benchmark {
27 namespace internal {
28 
29 constexpr size_t PerfCounterValues::kMaxCounters;
30 
31 #if defined HAVE_LIBPFM
32 
Read(const std::vector<int> & leaders)33 size_t PerfCounterValues::Read(const std::vector<int>& leaders) {
34   // Create a pointer for multiple reads
35   const size_t bufsize = values_.size() * sizeof(values_[0]);
36   char* ptr = reinterpret_cast<char*>(values_.data());
37   size_t size = bufsize;
38   for (int lead : leaders) {
39     auto read_bytes = ::read(lead, ptr, size);
40     if (read_bytes >= ssize_t(sizeof(uint64_t))) {
41       // Actual data bytes are all bytes minus initial padding
42       std::size_t data_bytes =
43           static_cast<std::size_t>(read_bytes) - sizeof(uint64_t);
44       // This should be very cheap since it's in hot cache
45       std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes);
46       // Increment our counters
47       ptr += data_bytes;
48       size -= data_bytes;
49     } else {
50       int err = errno;
51       GetErrorLogInstance() << "Error reading lead " << lead << " errno:" << err
52                             << " " << ::strerror(err) << "\n";
53       return 0;
54     }
55   }
56   return (bufsize - size) / sizeof(uint64_t);
57 }
58 
59 const bool PerfCounters::kSupported = true;
60 
61 // Initializes libpfm only on the first call.  Returns whether that single
62 // initialization was successful.
Initialize()63 bool PerfCounters::Initialize() {
64   // Function-scope static gets initialized only once on first call.
65   static const bool success = []() {
66     return pfm_initialize() == PFM_SUCCESS;
67   }();
68   return success;
69 }
70 
IsCounterSupported(const std::string & name)71 bool PerfCounters::IsCounterSupported(const std::string& name) {
72   Initialize();
73   perf_event_attr_t attr;
74   std::memset(&attr, 0, sizeof(attr));
75   pfm_perf_encode_arg_t arg;
76   std::memset(&arg, 0, sizeof(arg));
77   arg.attr = &attr;
78   const int mode = PFM_PLM3;  // user mode only
79   int ret = pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT_EXT,
80                                       &arg);
81   return (ret == PFM_SUCCESS);
82 }
83 
Create(const std::vector<std::string> & counter_names)84 PerfCounters PerfCounters::Create(
85     const std::vector<std::string>& counter_names) {
86   if (!counter_names.empty()) {
87     Initialize();
88   }
89 
90   // Valid counters will populate these arrays but we start empty
91   std::vector<std::string> valid_names;
92   std::vector<int> counter_ids;
93   std::vector<int> leader_ids;
94 
95   // Resize to the maximum possible
96   valid_names.reserve(counter_names.size());
97   counter_ids.reserve(counter_names.size());
98 
99   const int kCounterMode = PFM_PLM3;  // user mode only
100 
101   // Group leads will be assigned on demand. The idea is that once we cannot
102   // create a counter descriptor, the reason is that this group has maxed out
103   // so we set the group_id again to -1 and retry - giving the algorithm a
104   // chance to create a new group leader to hold the next set of counters.
105   int group_id = -1;
106 
107   // Loop through all performance counters
108   for (size_t i = 0; i < counter_names.size(); ++i) {
109     // we are about to push into the valid names vector
110     // check if we did not reach the maximum
111     if (valid_names.size() == PerfCounterValues::kMaxCounters) {
112       // Log a message if we maxed out and stop adding
113       GetErrorLogInstance()
114           << counter_names.size() << " counters were requested. The maximum is "
115           << PerfCounterValues::kMaxCounters << " and " << valid_names.size()
116           << " were already added. All remaining counters will be ignored\n";
117       // stop the loop and return what we have already
118       break;
119     }
120 
121     // Check if this name is empty
122     const auto& name = counter_names[i];
123     if (name.empty()) {
124       GetErrorLogInstance()
125           << "A performance counter name was the empty string\n";
126       continue;
127     }
128 
129     // Here first means first in group, ie the group leader
130     const bool is_first = (group_id < 0);
131 
132     // This struct will be populated by libpfm from the counter string
133     // and then fed into the syscall perf_event_open
134     struct perf_event_attr attr {};
135     attr.size = sizeof(attr);
136 
137     // This is the input struct to libpfm.
138     pfm_perf_encode_arg_t arg{};
139     arg.attr = &attr;
140     const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode,
141                                                   PFM_OS_PERF_EVENT, &arg);
142     if (pfm_get != PFM_SUCCESS) {
143       GetErrorLogInstance()
144           << "Unknown performance counter name: " << name << "\n";
145       continue;
146     }
147 
148     // We then proceed to populate the remaining fields in our attribute struct
149     // Note: the man page for perf_event_create suggests inherit = true and
150     // read_format = PERF_FORMAT_GROUP don't work together, but that's not the
151     // case.
152     attr.disabled = is_first;
153     attr.inherit = true;
154     attr.pinned = is_first;
155     attr.exclude_kernel = true;
156     attr.exclude_user = false;
157     attr.exclude_hv = true;
158 
159     // Read all counters in a group in one read.
160     attr.read_format = PERF_FORMAT_GROUP;
161 
162     int id = -1;
163     while (id < 0) {
164       static constexpr size_t kNrOfSyscallRetries = 5;
165       // Retry syscall as it was interrupted often (b/64774091).
166       for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
167            ++num_retries) {
168         id = perf_event_open(&attr, 0, -1, group_id, 0);
169         if (id >= 0 || errno != EINTR) {
170           break;
171         }
172       }
173       if (id < 0) {
174         // If the file descriptor is negative we might have reached a limit
175         // in the current group. Set the group_id to -1 and retry
176         if (group_id >= 0) {
177           // Create a new group
178           group_id = -1;
179         } else {
180           // At this point we have already retried to set a new group id and
181           // failed. We then give up.
182           break;
183         }
184       }
185     }
186 
187     // We failed to get a new file descriptor. We might have reached a hard
188     // hardware limit that cannot be resolved even with group multiplexing
189     if (id < 0) {
190       GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor "
191                                "for performance counter "
192                             << name << ". Ignoring\n";
193 
194       // We give up on this counter but try to keep going
195       // as the others would be fine
196       continue;
197     }
198     if (group_id < 0) {
199       // This is a leader, store and assign it to the current file descriptor
200       leader_ids.push_back(id);
201       group_id = id;
202     }
203     // This is a valid counter, add it to our descriptor's list
204     counter_ids.push_back(id);
205     valid_names.push_back(name);
206   }
207 
208   // Loop through all group leaders activating them
209   // There is another option of starting ALL counters in a process but
210   // that would be far reaching an intrusion. If the user is using PMCs
211   // by themselves then this would have a side effect on them. It is
212   // friendlier to loop through all groups individually.
213   for (int lead : leader_ids) {
214     if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) {
215       // This should never happen but if it does, we give up on the
216       // entire batch as recovery would be a mess.
217       GetErrorLogInstance() << "***WARNING*** Failed to start counters. "
218                                "Claring out all counters.\n";
219 
220       // Close all peformance counters
221       for (int id : counter_ids) {
222         ::close(id);
223       }
224 
225       // Return an empty object so our internal state is still good and
226       // the process can continue normally without impact
227       return NoCounters();
228     }
229   }
230 
231   return PerfCounters(std::move(valid_names), std::move(counter_ids),
232                       std::move(leader_ids));
233 }
234 
CloseCounters() const235 void PerfCounters::CloseCounters() const {
236   if (counter_ids_.empty()) {
237     return;
238   }
239   for (int lead : leader_ids_) {
240     ioctl(lead, PERF_EVENT_IOC_DISABLE);
241   }
242   for (int fd : counter_ids_) {
243     close(fd);
244   }
245 }
246 #else   // defined HAVE_LIBPFM
Read(const std::vector<int> &)247 size_t PerfCounterValues::Read(const std::vector<int>&) { return 0; }
248 
249 const bool PerfCounters::kSupported = false;
250 
Initialize()251 bool PerfCounters::Initialize() { return false; }
252 
IsCounterSupported(const std::string &)253 bool PerfCounters::IsCounterSupported(const std::string&) { return false; }
254 
Create(const std::vector<std::string> & counter_names)255 PerfCounters PerfCounters::Create(
256     const std::vector<std::string>& counter_names) {
257   if (!counter_names.empty()) {
258     GetErrorLogInstance() << "Performance counters not supported.\n";
259   }
260   return NoCounters();
261 }
262 
CloseCounters() const263 void PerfCounters::CloseCounters() const {}
264 #endif  // defined HAVE_LIBPFM
265 
PerfCountersMeasurement(const std::vector<std::string> & counter_names)266 PerfCountersMeasurement::PerfCountersMeasurement(
267     const std::vector<std::string>& counter_names)
268     : start_values_(counter_names.size()), end_values_(counter_names.size()) {
269   counters_ = PerfCounters::Create(counter_names);
270 }
271 
operator =(PerfCounters && other)272 PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
273   if (this != &other) {
274     CloseCounters();
275 
276     counter_ids_ = std::move(other.counter_ids_);
277     leader_ids_ = std::move(other.leader_ids_);
278     counter_names_ = std::move(other.counter_names_);
279   }
280   return *this;
281 }
282 }  // namespace internal
283 }  // namespace benchmark
284