1#!/usr/bin/python3 2 3# Copyright (C) 2022 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17"""Tool to analyze CPU performance from perfetto trace 18This too assumes that core clocks are fixed. 19It will not give accurate results if clock frequecies change. 20Should install perfetto: $ pip install perfetto 21""" 22 23import argparse 24import sys 25 26from perfetto.trace_processor import TraceProcessor 27 28from config import get_script_dir as get_script_dir 29from config import parse_config as parse_config 30from config import add_line_with_indentation 31 32# Get total idle time and active time from each core 33QUERY_SCHED_CORE_SUM = """SELECT 34 cpu AS core, 35 SUM (CASE 36 WHEN utid = 0 THEN 0 37 ELSE dur 38 END) AS activeTime, 39 SUM (CASE 40 WHEN utid = 0 THEN dur 41 ELSE 0 42 END) AS idleTime 43FROM sched 44GROUP BY cpu 45ORDER BY cpu""" 46 47class CoreLoad: 48 def __init__(self, coreId, totalCycles): 49 self.coreId = coreId 50 self.totalCycles = totalCycles 51 52class CPUExecutionInfo: 53 def __init__(self, name): 54 self.name = name 55 self.perCoreLoads = {} # key: core, value :CoreLoad 56 57 def addCoreLoad(self, load): 58 self.perCoreLoads[load.coreId] = load 59 60 def getCoreCycles(self, coreId): 61 l = self.perCoreLoads.get(coreId) 62 if l is None: 63 return 0 64 return l.totalCycles 65 66 def getTotalCycles(self): 67 sum = 0 68 for c in self.perCoreLoads: 69 l = self.perCoreLoads[c] 70 sum += l.totalCycles 71 return sum 72 73class ThreadInfo(CPUExecutionInfo): 74 def print(self, totalCpuCycles, perCoreTotalCycles, loadPercentile): 75 indentation = 2 76 msgs = [] 77 totalCpuLoad = float(self.getTotalCycles()) / totalCpuCycles * 100.0 78 activeCpuLoad = totalCpuLoad / loadPercentile * 100.0 79 add_line_with_indentation(msgs, 80 ("{}: total: {:.3f}% active: {:.3f}%"\ 81 .format(self.name, totalCpuLoad, activeCpuLoad)), indentation) 82 add_line_with_indentation(msgs, 50 * "-", indentation) 83 for c in sorted(self.perCoreLoads): 84 l = self.perCoreLoads[c] 85 coreLoad = float(l.totalCycles) / perCoreTotalCycles[c] * 100.0 86 add_line_with_indentation(msgs, 87 "{:<10} {:<15}".format("Core {}".format(c), 88 "{:.3f}%".format(coreLoad)), 89 indentation) 90 91 print("".join(msgs)) 92 93class ProcessInfo(CPUExecutionInfo): 94 def __init__(self, name): 95 super().__init__(name) 96 self.threads = [] # ThreadInfo 97 98 def get_filtered_threads(self, threadNames): 99 threads = list(filter( 100 lambda t: max(map(lambda filterName: t.name.find(filterName), threadNames)) > -1, 101 self.threads)) 102 103 return threads 104 105 def print(self, totalCpuCycles, perCoreTotalCycles, loadPercentile, showThreads=False): 106 msgs = [] 107 totalCpuLoad = float(self.getTotalCycles()) / totalCpuCycles * 100.0 108 activeCpuLoad = totalCpuLoad / loadPercentile * 100.0 109 msgs.append("{}: total: {:.3f}% active: {:.3f}%"\ 110 .format(self.name, totalCpuLoad, activeCpuLoad)) 111 msgs.append("\n" + 50 * "-") 112 for c in sorted(self.perCoreLoads): 113 l = self.perCoreLoads[c] 114 coreLoad = float(l.totalCycles) / perCoreTotalCycles[c] * 100.0 115 msgs.append("\n{:<10} {:<15}".format("Core {}".format(c), "{:.3f}%".format(coreLoad))) 116 117 print(''.join(msgs)) 118 119 if showThreads: 120 self.threads.sort(reverse = True, key = lambda p : p.getTotalCycles()) 121 for t in self.threads: 122 t.print(totalCpuCycles, perCoreTotalCycles, loadPercentile) 123 124 print('\n') 125 126 127class TotalCoreLoad: 128 def __init__(self, coreId, activeTime, idleTime): 129 self.coreId = coreId 130 self.activeTime = activeTime 131 self.idleTime = idleTime 132 self.loadPercentile = float(activeTime) / (idleTime + activeTime) * 100.0 133 134class SystemLoad: 135 def __init__(self): 136 self.totalLoads = [] # TotalCoreLoad 137 self.totalLoad = 0.0 138 self.processes = [] # ProcessInfo 139 140 def addTimeMeasurements(self, coreData, allCores): 141 coreLoads = {} # k: core, v: TotalCoreLoad 142 maxTotalTime = 0 143 for entry in coreData: 144 coreId = entry.core 145 activeTime = entry.activeTime 146 idleTime = entry.idleTime 147 totalTime = activeTime + idleTime 148 if maxTotalTime < totalTime: 149 maxTotalTime = totalTime 150 load = TotalCoreLoad(coreId, activeTime, idleTime) 151 coreLoads[coreId] = load 152 for c in allCores: 153 if coreLoads.get(c) is not None: 154 continue 155 # this core was not used at all. So add it with idle only 156 coreLoads[c] = TotalCoreLoad(c, 0, maxTotalTime) 157 for c in sorted(coreLoads): 158 self.totalLoads.append(coreLoads[c]) 159 160 def get_filtered_processes(self, process_names): 161 processPerName = {} 162 for name in process_names: 163 processes = list(filter(lambda p: p.name.find(name) > -1, self.processes)) 164 if len(processes) > 0: 165 processPerName[name] = processes 166 return processPerName 167 168 def print(self, cpuConfig, numTopN, filterProcesses, filterThreads): 169 print("\nTime based CPU load\n" + 30 * "=") 170 loadXClkSum = 0.0 171 maxCapacity = 0.0 172 perCoreCpuCycles = {} 173 totalCpuCycles = 0 174 maxCpuGHz = 0.0 175 print("{:<10} {:<15} {:<15} {:<15}\n{}".\ 176 format("CPU", "CPU Load %", "CPU Usage", "Max CPU Freq.", 60 * "-")) 177 for l in self.totalLoads: 178 coreMaxFreqGHz = float(cpuConfig.coreMaxFreqKHz[l.coreId]) / 1e6 179 coreIdStr = "Core {}".format(l.coreId) 180 loadPercentileStr = "{:.3f}%".format(l.loadPercentile) 181 loadUsageStr = "{:.3f} GHz".format(l.loadPercentile * coreMaxFreqGHz / 100) 182 coreMaxFreqStr = "{:.3f} GHz".format(coreMaxFreqGHz) 183 print("{:<10} {:<15} {:<15} {:<15}".\ 184 format(coreIdStr, loadPercentileStr, loadUsageStr, coreMaxFreqStr)) 185 maxCpuGHz += coreMaxFreqGHz 186 loadXClkSum += l.loadPercentile * coreMaxFreqGHz 187 perCoreCpuCycles[l.coreId] = (l.activeTime + l.idleTime) * coreMaxFreqGHz 188 totalCpuCycles += perCoreCpuCycles[l.coreId] 189 loadPercentile = float(loadXClkSum) / maxCpuGHz 190 print("\nTotal Load: {:.3f}%, {:.2f} GHz with system max {:.2f} GHz".\ 191 format(loadPercentile, loadPercentile * maxCpuGHz / 100.0, maxCpuGHz)) 192 193 self.processes.sort(reverse = True, key = lambda p : p.getTotalCycles()) 194 if filterThreads is not None: 195 print("\nFiltered threads\n" + 30 * "=") 196 processPerName = self.get_filtered_processes(filterThreads.keys()) 197 if len(processPerName) == 0: 198 print("No process found matching filters.") 199 for name in processPerName: 200 for p in processPerName[name]: 201 threads = p.get_filtered_threads(filterThreads[name]) 202 print("\n{}\n".format(p.name) + 30 * "-") 203 for t in threads: 204 t.print(totalCpuCycles, perCoreCpuCycles, loadPercentile) 205 206 207 if filterProcesses is not None: 208 print("\nFiltered processes\n" + 30 * "=") 209 processPerName = self.get_filtered_processes(filterProcesses) 210 if len(processPerName) == 0: 211 print("No process found matching filters.") 212 processes = sum(processPerName.values(), []) # flattens 2-D list 213 processes.sort(reverse = True, key = lambda p : p.getTotalCycles()) 214 for p in processes: 215 p.print(totalCpuCycles, perCoreCpuCycles, loadPercentile, showThreads=True) 216 217 print("\nTop processes\n" + 30 * "=") 218 for p in self.processes[:numTopN]: 219 p.print(totalCpuCycles, perCoreCpuCycles, loadPercentile) 220 221def init_arguments(): 222 parser = argparse.ArgumentParser(description='Analyze CPU perf.') 223 parser.add_argument('-f', '--configfile', dest='config_file', 224 default=get_script_dir() + '/pixel6.config', type=argparse.FileType('r'), 225 help='CPU config file', ) 226 parser.add_argument('-c', '--cpusettings', dest='cpusettings', action='store', 227 default='default', 228 help='CPU Settings to apply') 229 parser.add_argument('-n', '--number_of_top_processes', dest='number_of_top_processes', 230 action='store', type=int, default=5, 231 help='Number of processes to show in performance report') 232 parser.add_argument('-p', '--process-name', dest='process_names', action='append', 233 help='Name of process to filter') 234 parser.add_argument('-t', '--thread-name', dest='thread_names', action='append', 235 help='Name of thread to filter. Format: <process-name>:<thread-name>') 236 parser.add_argument('trace_file', action='store', nargs=1, 237 help='Perfetto trace file to analyze') 238 return parser.parse_args() 239 240def get_core_load(coreData, cpuConfig): 241 cpuFreqKHz = cpuConfig.coreMaxFreqKHz[coreData.id] 242 if coreData.metrics.HasField('avg_freq_khz'): 243 cpuFreqKHz = coreData.metrics.avg_freq_khz 244 cpuCycles = cpuFreqKHz * coreData.metrics.runtime_ns / 1000000 # unit should be Hz * s 245 return CoreLoad(coreData.id, cpuCycles) 246 247def run_analysis( 248 traceFile, 249 cpuConfig, 250 cpuSettings, 251 numTopN=5, 252 filterProcesses=None, 253 filterThreads=None 254): 255 tp = TraceProcessor(file_path=traceFile) 256 257 systemLoad = SystemLoad() 258 # get idle and active times per each cores 259 core_times = tp.query(QUERY_SCHED_CORE_SUM) 260 systemLoad.addTimeMeasurements(core_times, cpuSettings.onlines) 261 262 cpu_metrics = tp.metric(['android_cpu']).android_cpu 263 for p in cpu_metrics.process_info: 264 info = ProcessInfo(p.name) 265 for c in p.core: 266 l = get_core_load(c, cpuConfig) 267 info.addCoreLoad(l) 268 for t in p.threads: 269 thread_info = ThreadInfo(t.name) 270 for tc in t.core: 271 tl = get_core_load(tc, cpuConfig) 272 thread_info.addCoreLoad(tl) 273 info.threads.append(thread_info) 274 systemLoad.processes.append(info) 275 276 systemLoad.print(cpuConfig, numTopN, filterProcesses, filterThreads) 277 278def main(): 279 args = init_arguments() 280 281 # parse config 282 cpuConfig = parse_config(args.config_file) 283 cpuSettings = cpuConfig.configs.get(args.cpusettings) 284 if cpuSettings is None: 285 print("Cannot find cpusettings {}".format(args.cpusettings)) 286 return 287 288 threadsPerProcess = None 289 if args.thread_names is not None: 290 threadsPerProcess = {} 291 for threadName in args.thread_names: 292 names = threadName.split(':') 293 if len(names) != 2: 294 print(" Skipping {}: invalid format".format(threadName)) 295 continue 296 process, thread = names 297 if process not in threadsPerProcess: 298 threadsPerProcess[process] = [] 299 threadsPerProcess[process].append(thread) 300 if len(threadsPerProcess) == 0: 301 threadsPerProcess = None 302 303 run_analysis(args.trace_file[0], 304 cpuConfig, 305 cpuSettings, 306 args.number_of_top_processes, 307 args.process_names, 308 threadsPerProcess) 309 310if __name__ == '__main__': 311 main() 312