• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
3# Copyright (C) 2022 The Android Open Source Project
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
9#      http://www.apache.org/licenses/LICENSE-2.0
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
17"""Tool to analyze CPU performance from perfetto trace
18This too assumes that core clocks are fixed.
19It will not give accurate results if clock frequecies change.
20Should install perfetto: $ pip install perfetto
23import argparse
24import sys
26from perfetto.trace_processor import TraceProcessor
28from config import get_script_dir as get_script_dir
29from config import parse_config as parse_config
30from config import add_line_with_indentation
32# Get total idle time and active time from each core
34  cpu AS core,
36        WHEN utid = 0 THEN 0
37        ELSE dur
38  END) AS activeTime,
40        WHEN utid = 0 THEN dur
41        ELSE 0
42  END) AS idleTime
43FROM sched
44GROUP BY cpu
45ORDER BY cpu"""
47class CoreLoad:
48  def __init__(self, coreId, totalCycles):
49    self.coreId = coreId
50    self.totalCycles = totalCycles
52class CPUExecutionInfo:
53  def __init__(self, name):
54    self.name = name
55    self.perCoreLoads = {} # key: core, value :CoreLoad
57  def addCoreLoad(self, load):
58    self.perCoreLoads[load.coreId] = load
60  def getCoreCycles(self, coreId):
61    l = self.perCoreLoads.get(coreId)
62    if l is None:
63      return 0
64    return l.totalCycles
66  def getTotalCycles(self):
67    sum = 0
68    for c in self.perCoreLoads:
69      l = self.perCoreLoads[c]
70      sum += l.totalCycles
71    return sum
73class ThreadInfo(CPUExecutionInfo):
74  def print(self, totalCpuCycles, perCoreTotalCycles, loadPercentile):
75    indentation = 2
76    msgs = []
77    totalCpuLoad = float(self.getTotalCycles()) / totalCpuCycles * 100.0
78    activeCpuLoad = totalCpuLoad / loadPercentile * 100.0
79    add_line_with_indentation(msgs,
80                              ("{}: total: {:.3f}% active: {:.3f}%"\
81                               .format(self.name, totalCpuLoad, activeCpuLoad)), indentation)
82    add_line_with_indentation(msgs, 50 * "-", indentation)
83    for c in sorted(self.perCoreLoads):
84      l = self.perCoreLoads[c]
85      coreLoad = float(l.totalCycles) / perCoreTotalCycles[c] * 100.0
86      add_line_with_indentation(msgs,
87                                "{:<10} {:<15}".format("Core {}".format(c),
88                                                       "{:.3f}%".format(coreLoad)),
89                                indentation)
91    print("".join(msgs))
93class ProcessInfo(CPUExecutionInfo):
94  def __init__(self, name):
95    super().__init__(name)
96    self.threads = [] # ThreadInfo
98  def get_filtered_threads(self, threadNames):
99    threads = list(filter(
100        lambda t: max(map(lambda filterName: t.name.find(filterName), threadNames)) > -1,
101        self.threads))
103    return threads
105  def print(self, totalCpuCycles, perCoreTotalCycles, loadPercentile, showThreads=False):
106    msgs = []
107    totalCpuLoad = float(self.getTotalCycles()) / totalCpuCycles * 100.0
108    activeCpuLoad = totalCpuLoad / loadPercentile * 100.0
109    msgs.append("{}: total: {:.3f}% active: {:.3f}%"\
110                .format(self.name, totalCpuLoad, activeCpuLoad))
111    msgs.append("\n" + 50 * "-")
112    for c in sorted(self.perCoreLoads):
113      l = self.perCoreLoads[c]
114      coreLoad = float(l.totalCycles) / perCoreTotalCycles[c] * 100.0
115      msgs.append("\n{:<10} {:<15}".format("Core {}".format(c), "{:.3f}%".format(coreLoad)))
117    print(''.join(msgs))
119    if showThreads:
120      self.threads.sort(reverse = True, key = lambda p : p.getTotalCycles())
121      for t in self.threads:
122        t.print(totalCpuCycles, perCoreTotalCycles, loadPercentile)
124    print('\n')
127class TotalCoreLoad:
128  def __init__(self, coreId, activeTime, idleTime):
129    self.coreId = coreId
130    self.activeTime = activeTime
131    self.idleTime = idleTime
132    self.loadPercentile = float(activeTime) / (idleTime + activeTime) * 100.0
134class SystemLoad:
135  def __init__(self):
136    self.totalLoads = [] # TotalCoreLoad
137    self.totalLoad = 0.0
138    self.processes = [] # ProcessInfo
140  def addTimeMeasurements(self, coreData, allCores):
141    coreLoads = {} # k: core, v: TotalCoreLoad
142    maxTotalTime = 0
143    for entry in coreData:
144      coreId = entry.core
145      activeTime = entry.activeTime
146      idleTime = entry.idleTime
147      totalTime = activeTime + idleTime
148      if maxTotalTime < totalTime:
149        maxTotalTime = totalTime
150      load = TotalCoreLoad(coreId, activeTime, idleTime)
151      coreLoads[coreId] = load
152    for c in allCores:
153      if coreLoads.get(c) is not None:
154        continue
155      # this core was not used at all. So add it with idle only
156      coreLoads[c] = TotalCoreLoad(c, 0, maxTotalTime)
157    for c in sorted(coreLoads):
158      self.totalLoads.append(coreLoads[c])
160  def get_filtered_processes(self, process_names):
161      processPerName = {}
162      for name in process_names:
163        processes = list(filter(lambda p: p.name.find(name) > -1, self.processes))
164        if len(processes) > 0:
165          processPerName[name] = processes
166      return processPerName
168  def print(self, cpuConfig, numTopN, filterProcesses, filterThreads):
169    print("\nTime based CPU load\n" + 30 * "=")
170    loadXClkSum = 0.0
171    maxCapacity = 0.0
172    perCoreCpuCycles = {}
173    totalCpuCycles = 0
174    maxCpuGHz = 0.0
175    print("{:<10} {:<15} {:<15} {:<15}\n{}".\
176          format("CPU", "CPU Load %", "CPU Usage", "Max CPU Freq.", 60 * "-"))
177    for l in self.totalLoads:
178      coreMaxFreqGHz = float(cpuConfig.coreMaxFreqKHz[l.coreId]) / 1e6
179      coreIdStr = "Core {}".format(l.coreId)
180      loadPercentileStr = "{:.3f}%".format(l.loadPercentile)
181      loadUsageStr = "{:.3f} GHz".format(l.loadPercentile * coreMaxFreqGHz / 100)
182      coreMaxFreqStr = "{:.3f} GHz".format(coreMaxFreqGHz)
183      print("{:<10} {:<15} {:<15} {:<15}".\
184            format(coreIdStr, loadPercentileStr, loadUsageStr, coreMaxFreqStr))
185      maxCpuGHz += coreMaxFreqGHz
186      loadXClkSum += l.loadPercentile * coreMaxFreqGHz
187      perCoreCpuCycles[l.coreId] = (l.activeTime + l.idleTime) * coreMaxFreqGHz
188      totalCpuCycles += perCoreCpuCycles[l.coreId]
189    loadPercentile = float(loadXClkSum) / maxCpuGHz
190    print("\nTotal Load: {:.3f}%, {:.2f} GHz with system max {:.2f} GHz".\
191          format(loadPercentile, loadPercentile * maxCpuGHz / 100.0, maxCpuGHz))
193    self.processes.sort(reverse = True, key = lambda p : p.getTotalCycles())
194    if filterThreads is not None:
195      print("\nFiltered threads\n" + 30 * "=")
196      processPerName = self.get_filtered_processes(filterThreads.keys())
197      if len(processPerName) == 0:
198        print("No process found matching filters.")
199      for name in processPerName:
200        for p in processPerName[name]:
201          threads = p.get_filtered_threads(filterThreads[name])
202          print("\n{}\n".format(p.name) + 30 * "-")
203          for t in threads:
204            t.print(totalCpuCycles, perCoreCpuCycles, loadPercentile)
207    if filterProcesses is not None:
208      print("\nFiltered processes\n" + 30 * "=")
209      processPerName = self.get_filtered_processes(filterProcesses)
210      if len(processPerName) == 0:
211        print("No process found matching filters.")
212      processes = sum(processPerName.values(), []) # flattens 2-D list
213      processes.sort(reverse = True, key = lambda p : p.getTotalCycles())
214      for p in processes:
215        p.print(totalCpuCycles, perCoreCpuCycles, loadPercentile, showThreads=True)
217    print("\nTop processes\n" + 30 * "=")
218    for p in self.processes[:numTopN]:
219      p.print(totalCpuCycles, perCoreCpuCycles, loadPercentile)
221def init_arguments():
222  parser = argparse.ArgumentParser(description='Analyze CPU perf.')
223  parser.add_argument('-f', '--configfile', dest='config_file',
224                      default=get_script_dir() + '/pixel6.config', type=argparse.FileType('r'),
225                      help='CPU config file', )
226  parser.add_argument('-c', '--cpusettings', dest='cpusettings', action='store',
227                      default='default',
228                      help='CPU Settings to apply')
229  parser.add_argument('-n', '--number_of_top_processes', dest='number_of_top_processes',
230                      action='store', type=int, default=5,
231                      help='Number of processes to show in performance report')
232  parser.add_argument('-p', '--process-name', dest='process_names', action='append',
233                      help='Name of process to filter')
234  parser.add_argument('-t', '--thread-name', dest='thread_names', action='append',
235                      help='Name of thread to filter. Format: <process-name>:<thread-name>')
236  parser.add_argument('trace_file', action='store', nargs=1,
237                      help='Perfetto trace file to analyze')
238  return parser.parse_args()
240def get_core_load(coreData, cpuConfig):
241  cpuFreqKHz = cpuConfig.coreMaxFreqKHz[coreData.id]
242  if coreData.metrics.HasField('avg_freq_khz'):
243    cpuFreqKHz = coreData.metrics.avg_freq_khz
244  cpuCycles = cpuFreqKHz * coreData.metrics.runtime_ns / 1000000 # unit should be Hz * s
245  return CoreLoad(coreData.id, cpuCycles)
247def run_analysis(
248    traceFile,
249    cpuConfig,
250    cpuSettings,
251    numTopN=5,
252    filterProcesses=None,
253    filterThreads=None
255  tp = TraceProcessor(file_path=traceFile)
257  systemLoad = SystemLoad()
258  # get idle and active times per each cores
259  core_times = tp.query(QUERY_SCHED_CORE_SUM)
260  systemLoad.addTimeMeasurements(core_times, cpuSettings.onlines)
262  cpu_metrics = tp.metric(['android_cpu']).android_cpu
263  for p in cpu_metrics.process_info:
264    info = ProcessInfo(p.name)
265    for c in p.core:
266      l = get_core_load(c, cpuConfig)
267      info.addCoreLoad(l)
268    for t in p.threads:
269      thread_info = ThreadInfo(t.name)
270      for tc in t.core:
271        tl = get_core_load(tc, cpuConfig)
272        thread_info.addCoreLoad(tl)
273      info.threads.append(thread_info)
274    systemLoad.processes.append(info)
276  systemLoad.print(cpuConfig, numTopN, filterProcesses, filterThreads)
278def main():
279  args = init_arguments()
281  # parse config
282  cpuConfig = parse_config(args.config_file)
283  cpuSettings = cpuConfig.configs.get(args.cpusettings)
284  if cpuSettings is None:
285    print("Cannot find cpusettings {}".format(args.cpusettings))
286    return
288  threadsPerProcess = None
289  if args.thread_names is not None:
290    threadsPerProcess = {}
291    for threadName in args.thread_names:
292      names = threadName.split(':')
293      if len(names) != 2:
294        print(" Skipping {}: invalid format".format(threadName))
295        continue
296      process, thread = names
297      if process not in threadsPerProcess:
298        threadsPerProcess[process] = []
299      threadsPerProcess[process].append(thread)
300    if len(threadsPerProcess) == 0:
301      threadsPerProcess = None
303  run_analysis(args.trace_file[0],
304               cpuConfig,
305               cpuSettings,
306               args.number_of_top_processes,
307               args.process_names,
308               threadsPerProcess)
310if __name__ == '__main__':
311  main()