1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef TNT_UTILS_PROFILER_H 18 #define TNT_UTILS_PROFILER_H 19 20 #include <assert.h> 21 #include <stdint.h> 22 #include <string.h> 23 24 #include <chrono> // note: This is safe (only used inline) 25 26 #if defined(__linux__) 27 # include <unistd.h> 28 # include <sys/ioctl.h> 29 # include <linux/perf_event.h> 30 #endif 31 32 #include "compiler.h" 33 34 namespace utils { 35 36 class Profiler { 37 public: 38 enum { 39 INSTRUCTIONS = 0, // must be zero 40 CPU_CYCLES = 1, 41 DCACHE_REFS = 2, 42 DCACHE_MISSES = 3, 43 BRANCHES = 4, 44 BRANCH_MISSES = 5, 45 ICACHE_REFS = 6, 46 ICACHE_MISSES = 7, 47 48 // Must be last one 49 EVENT_COUNT 50 }; 51 52 enum { 53 EV_CPU_CYCLES = 1u << CPU_CYCLES, 54 EV_L1D_REFS = 1u << DCACHE_REFS, 55 EV_L1D_MISSES = 1u << DCACHE_MISSES, 56 EV_BPU_REFS = 1u << BRANCHES, 57 EV_BPU_MISSES = 1u << BRANCH_MISSES, 58 EV_L1I_REFS = 1u << ICACHE_REFS, 59 EV_L1I_MISSES = 1u << ICACHE_MISSES, 60 // helpers 61 EV_L1D_RATES = EV_L1D_REFS | EV_L1D_MISSES, 62 EV_L1I_RATES = EV_L1I_REFS | EV_L1I_MISSES, 63 EV_BPU_RATES = EV_BPU_REFS | EV_BPU_MISSES, 64 }; 65 66 Profiler() noexcept; // must call resetEvents() 67 explicit Profiler(uint32_t eventMask) noexcept; 68 ~Profiler() noexcept; 69 70 Profiler(const Profiler& rhs) = delete; 71 Profiler(Profiler&& rhs) = delete; 72 Profiler& operator=(const Profiler& rhs) = delete; 73 Profiler& operator=(Profiler&& rhs) = delete; 74 75 // selects which events are enabled. 76 uint32_t resetEvents(uint32_t eventMask) noexcept; 77 getEnabledEvents()78 uint32_t getEnabledEvents() const noexcept { return mEnabledEvents; } 79 80 // could return false if performance counters are not supported/enabled isValid()81 bool isValid() const { return mCountersFd[0] >= 0; } 82 83 class Counters { 84 friend class Profiler; 85 uint64_t nr; 86 uint64_t time_enabled; 87 uint64_t time_running; 88 struct { 89 uint64_t value; 90 uint64_t id; 91 } counters[Profiler::EVENT_COUNT]; 92 93 friend Counters operator-(Counters lhs, const Counters& rhs) noexcept { 94 lhs.nr -= rhs.nr; 95 lhs.time_enabled -= rhs.time_enabled; 96 lhs.time_running -= rhs.time_running; 97 for (size_t i = 0; i < EVENT_COUNT; ++i) { 98 lhs.counters[i].value -= rhs.counters[i].value; 99 } 100 return lhs; 101 } 102 103 public: getInstructions()104 uint64_t getInstructions() const { return counters[INSTRUCTIONS].value; } getCpuCycles()105 uint64_t getCpuCycles() const { return counters[CPU_CYCLES].value; } getL1DReferences()106 uint64_t getL1DReferences() const { return counters[DCACHE_REFS].value; } getL1DMisses()107 uint64_t getL1DMisses() const { return counters[DCACHE_MISSES].value; } getL1IReferences()108 uint64_t getL1IReferences() const { return counters[ICACHE_REFS].value; } getL1IMisses()109 uint64_t getL1IMisses() const { return counters[ICACHE_MISSES].value; } getBranchInstructions()110 uint64_t getBranchInstructions() const { return counters[BRANCHES].value; } getBranchMisses()111 uint64_t getBranchMisses() const { return counters[BRANCH_MISSES].value; } 112 getWallTime()113 std::chrono::duration<uint64_t, std::nano> getWallTime() const { 114 return std::chrono::duration<uint64_t, std::nano>(time_enabled); 115 } 116 getRunningTime()117 std::chrono::duration<uint64_t, std::nano> getRunningTime() const { 118 return std::chrono::duration<uint64_t, std::nano>(time_running); 119 } 120 getIPC()121 double getIPC() const noexcept { 122 uint64_t cpuCycles = getCpuCycles(); 123 uint64_t instructions = getInstructions(); 124 return double(instructions) / double(cpuCycles); 125 } 126 getCPI()127 double getCPI() const noexcept { 128 uint64_t cpuCycles = getCpuCycles(); 129 uint64_t instructions = getInstructions(); 130 return double(cpuCycles) / double(instructions); 131 } 132 getL1DMissRate()133 double getL1DMissRate() const noexcept { 134 uint64_t cacheReferences = getL1DReferences(); 135 uint64_t cacheMisses = getL1DMisses(); 136 return double(cacheMisses) / double(cacheReferences); 137 } 138 getL1DHitRate()139 double getL1DHitRate() const noexcept { 140 return 1.0 - getL1DMissRate(); 141 } 142 getL1IMissRate()143 double getL1IMissRate() const noexcept { 144 uint64_t cacheReferences = getL1IReferences(); 145 uint64_t cacheMisses = getL1IMisses(); 146 return double(cacheMisses) / double(cacheReferences); 147 } 148 getL1IHitRate()149 double getL1IHitRate() const noexcept { 150 return 1.0 - getL1IMissRate(); 151 } 152 getBranchMissRate()153 double getBranchMissRate() const noexcept { 154 uint64_t branchReferences = getBranchInstructions(); 155 uint64_t branchMisses = getBranchMisses(); 156 return double(branchMisses) / double(branchReferences); 157 } 158 getBranchHitRate()159 double getBranchHitRate() const noexcept { 160 return 1.0 - getBranchMissRate(); 161 } 162 getMPKI(uint64_t misses)163 double getMPKI(uint64_t misses) const noexcept { 164 return (misses * 1000.0) / getInstructions(); 165 } 166 }; 167 168 #if defined(__linux__) 169 reset()170 void reset() noexcept { 171 int fd = mCountersFd[0]; 172 ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP); 173 } 174 start()175 void start() noexcept { 176 int fd = mCountersFd[0]; 177 ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP); 178 } 179 stop()180 void stop() noexcept { 181 int fd = mCountersFd[0]; 182 ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP); 183 } 184 185 Counters readCounters() noexcept; 186 187 #else // !__linux__ 188 reset()189 void reset() noexcept { } start()190 void start() noexcept { } stop()191 void stop() noexcept { } readCounters()192 Counters readCounters() noexcept { return {}; } 193 194 #endif // __linux__ 195 hasBranchRates()196 bool hasBranchRates() const noexcept { 197 return (mCountersFd[BRANCHES] >= 0) && (mCountersFd[BRANCH_MISSES] >= 0); 198 } 199 hasICacheRates()200 bool hasICacheRates() const noexcept { 201 return (mCountersFd[ICACHE_REFS] >= 0) && (mCountersFd[ICACHE_MISSES] >= 0); 202 } 203 204 private: 205 UTILS_UNUSED uint8_t mIds[EVENT_COUNT] = {}; 206 int mCountersFd[EVENT_COUNT]; 207 uint32_t mEnabledEvents = 0; 208 }; 209 210 } // namespace utils 211 212 #endif // TNT_UTILS_PROFILER_H 213