1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef TNT_UTILS_PROFILER_H
18 #define TNT_UTILS_PROFILER_H
19 
20 #include <assert.h>
21 #include <stdint.h>
22 #include <string.h>
23 
24 #include <chrono>   // note: This is safe (only used inline)
25 
26 #if defined(__linux__)
27 #   include <unistd.h>
28 #   include <sys/ioctl.h>
29 #   include <linux/perf_event.h>
30 #endif
31 
32 #include "compiler.h"
33 
34 namespace utils {
35 
36     class Profiler {
37     public:
38         enum {
39             INSTRUCTIONS    = 0,   // must be zero
40             CPU_CYCLES      = 1,
41             DCACHE_REFS     = 2,
42             DCACHE_MISSES   = 3,
43             BRANCHES        = 4,
44             BRANCH_MISSES   = 5,
45             ICACHE_REFS     = 6,
46             ICACHE_MISSES   = 7,
47 
48             // Must be last one
49             EVENT_COUNT
50         };
51 
52         enum {
53             EV_CPU_CYCLES = 1u << CPU_CYCLES,
54             EV_L1D_REFS   = 1u << DCACHE_REFS,
55             EV_L1D_MISSES = 1u << DCACHE_MISSES,
56             EV_BPU_REFS   = 1u << BRANCHES,
57             EV_BPU_MISSES = 1u << BRANCH_MISSES,
58             EV_L1I_REFS   = 1u << ICACHE_REFS,
59             EV_L1I_MISSES = 1u << ICACHE_MISSES,
60             // helpers
61             EV_L1D_RATES = EV_L1D_REFS | EV_L1D_MISSES,
62             EV_L1I_RATES = EV_L1I_REFS | EV_L1I_MISSES,
63             EV_BPU_RATES = EV_BPU_REFS | EV_BPU_MISSES,
64         };
65 
66         Profiler() noexcept; // must call resetEvents()
67         explicit Profiler(uint32_t eventMask) noexcept;
68         ~Profiler() noexcept;
69 
70         Profiler(const Profiler& rhs) = delete;
71         Profiler(Profiler&& rhs) = delete;
72         Profiler& operator=(const Profiler& rhs) = delete;
73         Profiler& operator=(Profiler&& rhs) = delete;
74 
75         // selects which events are enabled.
76         uint32_t resetEvents(uint32_t eventMask) noexcept;
77 
getEnabledEvents()78         uint32_t getEnabledEvents() const noexcept { return mEnabledEvents; }
79 
80         // could return false if performance counters are not supported/enabled
isValid()81         bool isValid() const { return mCountersFd[0] >= 0; }
82 
83         class Counters {
84             friend class Profiler;
85             uint64_t nr;
86             uint64_t time_enabled;
87             uint64_t time_running;
88             struct {
89                 uint64_t value;
90                 uint64_t id;
91             } counters[Profiler::EVENT_COUNT];
92 
93             friend Counters operator-(Counters lhs, const Counters& rhs) noexcept {
94                 lhs.nr -= rhs.nr;
95                 lhs.time_enabled -= rhs.time_enabled;
96                 lhs.time_running -= rhs.time_running;
97                 for (size_t i = 0; i < EVENT_COUNT; ++i) {
98                     lhs.counters[i].value -= rhs.counters[i].value;
99                 }
100                 return lhs;
101             }
102 
103         public:
getInstructions()104             uint64_t getInstructions() const        { return counters[INSTRUCTIONS].value; }
getCpuCycles()105             uint64_t getCpuCycles() const           { return counters[CPU_CYCLES].value; }
getL1DReferences()106             uint64_t getL1DReferences() const       { return counters[DCACHE_REFS].value; }
getL1DMisses()107             uint64_t getL1DMisses() const           { return counters[DCACHE_MISSES].value; }
getL1IReferences()108             uint64_t getL1IReferences() const       { return counters[ICACHE_REFS].value; }
getL1IMisses()109             uint64_t getL1IMisses() const           { return counters[ICACHE_MISSES].value; }
getBranchInstructions()110             uint64_t getBranchInstructions() const  { return counters[BRANCHES].value; }
getBranchMisses()111             uint64_t getBranchMisses() const        { return counters[BRANCH_MISSES].value; }
112 
getWallTime()113             std::chrono::duration<uint64_t, std::nano> getWallTime() const {
114                 return std::chrono::duration<uint64_t, std::nano>(time_enabled);
115             }
116 
getRunningTime()117             std::chrono::duration<uint64_t, std::nano> getRunningTime() const {
118                 return std::chrono::duration<uint64_t, std::nano>(time_running);
119             }
120 
getIPC()121             double getIPC() const noexcept {
122                 uint64_t cpuCycles = getCpuCycles();
123                 uint64_t instructions = getInstructions();
124                 return double(instructions) / double(cpuCycles);
125             }
126 
getCPI()127             double getCPI() const noexcept {
128                 uint64_t cpuCycles = getCpuCycles();
129                 uint64_t instructions = getInstructions();
130                 return double(cpuCycles) / double(instructions);
131             }
132 
getL1DMissRate()133             double getL1DMissRate() const noexcept {
134                 uint64_t cacheReferences = getL1DReferences();
135                 uint64_t cacheMisses = getL1DMisses();
136                 return double(cacheMisses) / double(cacheReferences);
137             }
138 
getL1DHitRate()139             double getL1DHitRate() const noexcept {
140                 return 1.0 - getL1DMissRate();
141             }
142 
getL1IMissRate()143             double getL1IMissRate() const noexcept {
144                 uint64_t cacheReferences = getL1IReferences();
145                 uint64_t cacheMisses = getL1IMisses();
146                 return double(cacheMisses) / double(cacheReferences);
147             }
148 
getL1IHitRate()149             double getL1IHitRate() const noexcept {
150                 return 1.0 - getL1IMissRate();
151             }
152 
getBranchMissRate()153             double getBranchMissRate() const noexcept {
154                 uint64_t branchReferences = getBranchInstructions();
155                 uint64_t branchMisses = getBranchMisses();
156                 return double(branchMisses) / double(branchReferences);
157             }
158 
getBranchHitRate()159             double getBranchHitRate() const noexcept {
160                 return 1.0 - getBranchMissRate();
161             }
162 
getMPKI(uint64_t misses)163             double getMPKI(uint64_t misses) const noexcept {
164                 return (misses * 1000.0) / getInstructions();
165             }
166         };
167 
168 #if defined(__linux__)
169 
reset()170         void reset() noexcept {
171         int fd = mCountersFd[0];
172         ioctl(fd, PERF_EVENT_IOC_RESET,  PERF_IOC_FLAG_GROUP);
173     }
174 
start()175     void start() noexcept {
176         int fd = mCountersFd[0];
177         ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
178     }
179 
stop()180     void stop() noexcept {
181         int fd = mCountersFd[0];
182         ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
183     }
184 
185     Counters readCounters() noexcept;
186 
187 #else // !__linux__
188 
reset()189         void reset() noexcept { }
start()190         void start() noexcept { }
stop()191         void stop() noexcept { }
readCounters()192         Counters readCounters() noexcept { return {}; }
193 
194 #endif // __linux__
195 
hasBranchRates()196         bool hasBranchRates() const noexcept {
197             return (mCountersFd[BRANCHES] >= 0) && (mCountersFd[BRANCH_MISSES] >= 0);
198         }
199 
hasICacheRates()200         bool hasICacheRates() const noexcept {
201             return (mCountersFd[ICACHE_REFS] >= 0) && (mCountersFd[ICACHE_MISSES] >= 0);
202         }
203 
204     private:
205         UTILS_UNUSED uint8_t mIds[EVENT_COUNT] = {};
206         int mCountersFd[EVENT_COUNT];
207         uint32_t mEnabledEvents = 0;
208     };
209 
210 } // namespace utils
211 
212 #endif // TNT_UTILS_PROFILER_H
213