1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "Profiler.h"
18 
19 #include <stdlib.h>
20 #include <string.h>
21 
22 #if !defined(WIN32)
23 
24 #include <unistd.h>
25 
26 #else
27 #   include <io.h>
28 #   define close _close
29 #endif
30 
31 #include <algorithm>
32 #include <iterator>
33 #include <memory>
34 
35 #if defined(__linux__)
36 
37 #include <sys/syscall.h>
38 
39 #ifdef __ARM_ARCH
40 enum ARMv8PmuPerfTypes{
41     // Common micro-architecture events
42     ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL    = 0x01,
43     ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS    = 0x14,
44     ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS     = 0x16,
45     ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL     = 0x17,
46     ARMV8_PMUV3_PERFCTR_L2_CACHE_WB         = 0x18,
47 };
48 #endif
49 
50 #include <android/log.h>
51 #include <errno.h>
52 
53 #define LOG_TAG "Benchmark"
54 
55 extern int    errno;
56 
perf_event_open(perf_event_attr * hw_event,pid_t pid,int cpu,int group_fd,unsigned long flags)57 static int perf_event_open(perf_event_attr *hw_event, pid_t pid,
58                            int cpu, int group_fd, unsigned long flags) {
59     return (int) syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
60 }
61 
62 #endif // __linux__
63 
64 namespace utils {
65 
Profiler()66     Profiler::Profiler() noexcept {
67         std::uninitialized_fill(std::begin(mCountersFd), std::end(mCountersFd), -1);
68     }
69 
Profiler(uint32_t eventMask)70     Profiler::Profiler(uint32_t eventMask) noexcept: Profiler() {
71         Profiler::resetEvents(eventMask);
72     }
73 
~Profiler()74     Profiler::~Profiler() noexcept {
75 #pragma nounroll
76         for (int fd: mCountersFd) {
77             if (fd >= 0) {
78                 close(fd);
79             }
80         }
81     }
82 
resetEvents(uint32_t eventMask)83     uint32_t Profiler::resetEvents(uint32_t eventMask) noexcept {
84 // close all counters
85 #pragma nounroll
86         for (int &fd: mCountersFd) {
87             if (fd >= 0) {
88                 close(fd);
89                 fd = -1;
90             }
91         }
92         mEnabledEvents = 0;
93 
94 #if defined(__linux__)
95 
96         perf_event_attr pe{};
97         pe.type = PERF_TYPE_HARDWARE;
98         pe.size = sizeof(perf_event_attr);
99         pe.config = PERF_COUNT_HW_INSTRUCTIONS;
100         pe.disabled = 1;
101         pe.exclude_kernel = 1;
102         pe.exclude_hv = 1;
103         pe.read_format = PERF_FORMAT_GROUP |
104                          PERF_FORMAT_ID |
105                          PERF_FORMAT_TOTAL_TIME_ENABLED |
106                          PERF_FORMAT_TOTAL_TIME_RUNNING;
107 
108         int fd = perf_event_open(&pe, 0, -1, -1, 0);
109         if (fd == -1) {
110             __android_log_print(
111                     ANDROID_LOG_ERROR,
112                     LOG_TAG,
113                     "perf_event_open failed: [%d]%s",
114                     errno,
115                     strerror(errno)
116             );
117             exit(EXIT_FAILURE);
118         }
119 
120         uint8_t count = 0;
121         if (fd >= 0) {
122             const int groupFd = fd;
123             mIds[INSTRUCTIONS] = count++;
124             mCountersFd[INSTRUCTIONS] = fd;
125 
126             pe.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
127 
128             if (eventMask & EV_CPU_CYCLES) {
129                 pe.type = PERF_TYPE_HARDWARE;
130                 pe.config = PERF_COUNT_HW_CPU_CYCLES;
131                 mCountersFd[CPU_CYCLES] = perf_event_open(&pe, 0, -1, groupFd, 0);
132                 if (mCountersFd[CPU_CYCLES] > 0) {
133                     mIds[CPU_CYCLES] = count++;
134                     mEnabledEvents |= EV_CPU_CYCLES;
135                 }
136             }
137 
138             if (eventMask & EV_L1D_REFS) {
139                 pe.type = PERF_TYPE_HARDWARE;
140                 pe.config = PERF_COUNT_HW_CACHE_REFERENCES;
141                 mCountersFd[DCACHE_REFS] = perf_event_open(&pe, 0, -1, groupFd, 0);
142                 if (mCountersFd[DCACHE_REFS] > 0) {
143                     mIds[DCACHE_REFS] = count++;
144                     mEnabledEvents |= EV_L1D_REFS;
145                 }
146             }
147 
148             if (eventMask & EV_L1D_MISSES) {
149                 pe.type = PERF_TYPE_HARDWARE;
150                 pe.config = PERF_COUNT_HW_CACHE_MISSES;
151                 mCountersFd[DCACHE_MISSES] = perf_event_open(&pe, 0, -1, groupFd, 0);
152                 if (mCountersFd[DCACHE_MISSES] > 0) {
153                     mIds[DCACHE_MISSES] = count++;
154                     mEnabledEvents |= EV_L1D_MISSES;
155                 }
156             }
157 
158             if (eventMask & EV_BPU_REFS) {
159                 pe.type = PERF_TYPE_HARDWARE;
160                 pe.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
161                 mCountersFd[BRANCHES] = perf_event_open(&pe, 0, -1, groupFd, 0);
162                 if (mCountersFd[BRANCHES] > 0) {
163                     mIds[BRANCHES] = count++;
164                     mEnabledEvents |= EV_BPU_REFS;
165                 }
166             }
167 
168             if (eventMask & EV_BPU_MISSES) {
169                 pe.type = PERF_TYPE_HARDWARE;
170                 pe.config = PERF_COUNT_HW_BRANCH_MISSES;
171                 mCountersFd[BRANCH_MISSES] = perf_event_open(&pe, 0, -1, groupFd, 0);
172                 if (mCountersFd[BRANCH_MISSES] > 0) {
173                     mIds[BRANCH_MISSES] = count++;
174                     mEnabledEvents |= EV_BPU_MISSES;
175                 }
176             }
177 
178 #ifdef __ARM_ARCH
179             if (eventMask & EV_L1I_REFS) {
180                 pe.type = PERF_TYPE_RAW;
181                 pe.config = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS;
182                 mCountersFd[ICACHE_REFS] = perf_event_open(&pe, 0, -1, groupFd, 0);
183                 if (mCountersFd[ICACHE_REFS] > 0) {
184                     mIds[ICACHE_REFS] = count++;
185                     mEnabledEvents |= EV_L1I_REFS;
186                 }
187             }
188 
189             if (eventMask & EV_L1I_MISSES) {
190                 pe.type = PERF_TYPE_RAW;
191                 pe.config = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL;
192                 mCountersFd[ICACHE_MISSES] = perf_event_open(&pe, 0, -1, groupFd, 0);
193                 if (mCountersFd[ICACHE_MISSES] > 0) {
194                     mIds[ICACHE_MISSES] = count++;
195                     mEnabledEvents |= EV_L1I_MISSES;
196                 }
197             }
198 #else
199             if (eventMask & EV_L1I_REFS) {
200                 pe.type = PERF_TYPE_HW_CACHE;
201                 pe.config = PERF_COUNT_HW_CACHE_L1I |
202                             (PERF_COUNT_HW_CACHE_OP_READ << 8) |
203                             (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16);
204                 mCountersFd[ICACHE_REFS] = perf_event_open(&pe, 0, -1, groupFd, 0);
205                 if (mCountersFd[ICACHE_REFS] > 0) {
206                     mIds[ICACHE_REFS] = count++;
207                     mEnabledEvents |= EV_L1I_REFS;
208                 }
209             }
210 
211             if (eventMask & EV_L1I_MISSES) {
212                 pe.type = PERF_TYPE_HW_CACHE;
213                 pe.config = PERF_COUNT_HW_CACHE_L1I |
214                             (PERF_COUNT_HW_CACHE_OP_READ << 8) |
215                             (PERF_COUNT_HW_CACHE_RESULT_MISS << 16);
216                 mCountersFd[ICACHE_MISSES] = perf_event_open(&pe, 0, -1, groupFd, 0);
217                 if (mCountersFd[ICACHE_MISSES] > 0) {
218                     mIds[ICACHE_MISSES] = count++;
219                     mEnabledEvents |= EV_L1I_MISSES;
220                 }
221             }
222 #endif
223         }
224 #endif // __linux__
225         return mEnabledEvents;
226     }
227 
228 #if defined(__linux__)
229 
readCounters()230     Profiler::Counters Profiler::readCounters() noexcept {
231         Counters outCounters{};
232         Counters counters; // NOLINT
233         ssize_t n = read(mCountersFd[0], &counters, sizeof(Counters));
234         if (n == -1) {
235             __android_log_print(
236                     ANDROID_LOG_ERROR,
237                     LOG_TAG,
238                     "read failed: [%d]%s",
239                     errno,
240                     strerror(errno)
241             );
242             exit(EXIT_FAILURE);
243         }
244         if (n > 0) {
245             outCounters.nr = counters.nr;
246             outCounters.time_enabled = counters.time_enabled;
247             outCounters.time_running = counters.time_running;
248             for (size_t i = 0; i < size_t(EVENT_COUNT); i++) {
249                 // in theory we should check that mCountersFd[i] >= 0, but we don't to avoid
250                 // a branch, mIds[] is initialized such we won't access past the counters array.
251                 outCounters.counters[i] = counters.counters[mIds[i]];
252             }
253         }
254         return outCounters;
255     }
256 
257 #endif // __linux__
258 
259 } // namespace utils
260