• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python3
2
3# Copyright (C) 2022 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17"""Tool to analyze CPU performance from perfetto trace
18This too assumes that core clocks are fixed.
19It will not give accurate results if clock frequecies change.
20Should install perfetto: $ pip install perfetto
21"""
22
23import argparse
24import sys
25
26from perfetto.trace_processor import TraceProcessor
27
28from config import get_script_dir as get_script_dir
29from config import parse_config as parse_config
30from config import add_line_with_indentation
31
32# Get total idle time and active time from each core
33QUERY_SCHED_CORE_SUM = """SELECT
34  cpu AS core,
35  SUM (CASE
36        WHEN utid = 0 THEN 0
37        ELSE dur
38  END) AS activeTime,
39  SUM (CASE
40        WHEN utid = 0 THEN dur
41        ELSE 0
42  END) AS idleTime
43FROM sched
44GROUP BY cpu
45ORDER BY cpu"""
46
47class CoreLoad:
48  def __init__(self, coreId, totalCycles):
49    self.coreId = coreId
50    self.totalCycles = totalCycles
51
52class CPUExecutionInfo:
53  def __init__(self, name):
54    self.name = name
55    self.perCoreLoads = {} # key: core, value :CoreLoad
56
57  def addCoreLoad(self, load):
58    self.perCoreLoads[load.coreId] = load
59
60  def getCoreCycles(self, coreId):
61    l = self.perCoreLoads.get(coreId)
62    if l is None:
63      return 0
64    return l.totalCycles
65
66  def getTotalCycles(self):
67    sum = 0
68    for c in self.perCoreLoads:
69      l = self.perCoreLoads[c]
70      sum += l.totalCycles
71    return sum
72
73class ThreadInfo(CPUExecutionInfo):
74  def print(self, totalCpuCycles, perCoreTotalCycles, loadPercentile):
75    indentation = 2
76    msgs = []
77    totalCpuLoad = float(self.getTotalCycles()) / totalCpuCycles * 100.0
78    activeCpuLoad = totalCpuLoad / loadPercentile * 100.0
79    add_line_with_indentation(msgs,
80                              ("{}: total: {:.3f}% active: {:.3f}%"\
81                               .format(self.name, totalCpuLoad, activeCpuLoad)), indentation)
82    add_line_with_indentation(msgs, 50 * "-", indentation)
83    for c in sorted(self.perCoreLoads):
84      l = self.perCoreLoads[c]
85      coreLoad = float(l.totalCycles) / perCoreTotalCycles[c] * 100.0
86      add_line_with_indentation(msgs,
87                                "{:<10} {:<15}".format("Core {}".format(c),
88                                                       "{:.3f}%".format(coreLoad)),
89                                indentation)
90
91    print("".join(msgs))
92
93class ProcessInfo(CPUExecutionInfo):
94  def __init__(self, name):
95    super().__init__(name)
96    self.threads = [] # ThreadInfo
97
98  def get_filtered_threads(self, threadNames):
99    threads = list(filter(
100        lambda t: max(map(lambda filterName: t.name.find(filterName), threadNames)) > -1,
101        self.threads))
102
103    return threads
104
105  def print(self, totalCpuCycles, perCoreTotalCycles, loadPercentile, showThreads=False):
106    msgs = []
107    totalCpuLoad = float(self.getTotalCycles()) / totalCpuCycles * 100.0
108    activeCpuLoad = totalCpuLoad / loadPercentile * 100.0
109    msgs.append("{}: total: {:.3f}% active: {:.3f}%"\
110                .format(self.name, totalCpuLoad, activeCpuLoad))
111    msgs.append("\n" + 50 * "-")
112    for c in sorted(self.perCoreLoads):
113      l = self.perCoreLoads[c]
114      coreLoad = float(l.totalCycles) / perCoreTotalCycles[c] * 100.0
115      msgs.append("\n{:<10} {:<15}".format("Core {}".format(c), "{:.3f}%".format(coreLoad)))
116
117    print(''.join(msgs))
118
119    if showThreads:
120      self.threads.sort(reverse = True, key = lambda p : p.getTotalCycles())
121      for t in self.threads:
122        t.print(totalCpuCycles, perCoreTotalCycles, loadPercentile)
123
124    print('\n')
125
126
127class TotalCoreLoad:
128  def __init__(self, coreId, activeTime, idleTime):
129    self.coreId = coreId
130    self.activeTime = activeTime
131    self.idleTime = idleTime
132    self.loadPercentile = float(activeTime) / (idleTime + activeTime) * 100.0
133
134class SystemLoad:
135  def __init__(self):
136    self.totalLoads = [] # TotalCoreLoad
137    self.totalLoad = 0.0
138    self.processes = [] # ProcessInfo
139
140  def addTimeMeasurements(self, coreData, allCores):
141    coreLoads = {} # k: core, v: TotalCoreLoad
142    maxTotalTime = 0
143    for entry in coreData:
144      coreId = entry.core
145      activeTime = entry.activeTime
146      idleTime = entry.idleTime
147      totalTime = activeTime + idleTime
148      if maxTotalTime < totalTime:
149        maxTotalTime = totalTime
150      load = TotalCoreLoad(coreId, activeTime, idleTime)
151      coreLoads[coreId] = load
152    for c in allCores:
153      if coreLoads.get(c) is not None:
154        continue
155      # this core was not used at all. So add it with idle only
156      coreLoads[c] = TotalCoreLoad(c, 0, maxTotalTime)
157    for c in sorted(coreLoads):
158      self.totalLoads.append(coreLoads[c])
159
160  def get_filtered_processes(self, process_names):
161      processPerName = {}
162      for name in process_names:
163        processes = list(filter(lambda p: p.name.find(name) > -1, self.processes))
164        if len(processes) > 0:
165          processPerName[name] = processes
166      return processPerName
167
168  def print(self, cpuConfig, numTopN, filterProcesses, filterThreads):
169    print("\nTime based CPU load\n" + 30 * "=")
170    loadXClkSum = 0.0
171    maxCapacity = 0.0
172    perCoreCpuCycles = {}
173    totalCpuCycles = 0
174    maxCpuGHz = 0.0
175    print("{:<10} {:<15} {:<15} {:<15}\n{}".\
176          format("CPU", "CPU Load %", "CPU Usage", "Max CPU Freq.", 60 * "-"))
177    for l in self.totalLoads:
178      coreMaxFreqGHz = float(cpuConfig.coreMaxFreqKHz[l.coreId]) / 1e6
179      coreIdStr = "Core {}".format(l.coreId)
180      loadPercentileStr = "{:.3f}%".format(l.loadPercentile)
181      loadUsageStr = "{:.3f} GHz".format(l.loadPercentile * coreMaxFreqGHz / 100)
182      coreMaxFreqStr = "{:.3f} GHz".format(coreMaxFreqGHz)
183      print("{:<10} {:<15} {:<15} {:<15}".\
184            format(coreIdStr, loadPercentileStr, loadUsageStr, coreMaxFreqStr))
185      maxCpuGHz += coreMaxFreqGHz
186      loadXClkSum += l.loadPercentile * coreMaxFreqGHz
187      perCoreCpuCycles[l.coreId] = (l.activeTime + l.idleTime) * coreMaxFreqGHz
188      totalCpuCycles += perCoreCpuCycles[l.coreId]
189    loadPercentile = float(loadXClkSum) / maxCpuGHz
190    print("\nTotal Load: {:.3f}%, {:.2f} GHz with system max {:.2f} GHz".\
191          format(loadPercentile, loadPercentile * maxCpuGHz / 100.0, maxCpuGHz))
192
193    self.processes.sort(reverse = True, key = lambda p : p.getTotalCycles())
194    if filterThreads is not None:
195      print("\nFiltered threads\n" + 30 * "=")
196      processPerName = self.get_filtered_processes(filterThreads.keys())
197      if len(processPerName) == 0:
198        print("No process found matching filters.")
199      for name in processPerName:
200        for p in processPerName[name]:
201          threads = p.get_filtered_threads(filterThreads[name])
202          print("\n{}\n".format(p.name) + 30 * "-")
203          for t in threads:
204            t.print(totalCpuCycles, perCoreCpuCycles, loadPercentile)
205
206
207    if filterProcesses is not None:
208      print("\nFiltered processes\n" + 30 * "=")
209      processPerName = self.get_filtered_processes(filterProcesses)
210      if len(processPerName) == 0:
211        print("No process found matching filters.")
212      processes = sum(processPerName.values(), []) # flattens 2-D list
213      processes.sort(reverse = True, key = lambda p : p.getTotalCycles())
214      for p in processes:
215        p.print(totalCpuCycles, perCoreCpuCycles, loadPercentile, showThreads=True)
216
217    print("\nTop processes\n" + 30 * "=")
218    for p in self.processes[:numTopN]:
219      p.print(totalCpuCycles, perCoreCpuCycles, loadPercentile)
220
221def init_arguments():
222  parser = argparse.ArgumentParser(description='Analyze CPU perf.')
223  parser.add_argument('-f', '--configfile', dest='config_file',
224                      default=get_script_dir() + '/pixel6.config', type=argparse.FileType('r'),
225                      help='CPU config file', )
226  parser.add_argument('-c', '--cpusettings', dest='cpusettings', action='store',
227                      default='default',
228                      help='CPU Settings to apply')
229  parser.add_argument('-n', '--number_of_top_processes', dest='number_of_top_processes',
230                      action='store', type=int, default=5,
231                      help='Number of processes to show in performance report')
232  parser.add_argument('-p', '--process-name', dest='process_names', action='append',
233                      help='Name of process to filter')
234  parser.add_argument('-t', '--thread-name', dest='thread_names', action='append',
235                      help='Name of thread to filter. Format: <process-name>:<thread-name>')
236  parser.add_argument('trace_file', action='store', nargs=1,
237                      help='Perfetto trace file to analyze')
238  return parser.parse_args()
239
240def get_core_load(coreData, cpuConfig):
241  cpuFreqKHz = cpuConfig.coreMaxFreqKHz[coreData.id]
242  if coreData.metrics.HasField('avg_freq_khz'):
243    cpuFreqKHz = coreData.metrics.avg_freq_khz
244  cpuCycles = cpuFreqKHz * coreData.metrics.runtime_ns / 1000000 # unit should be Hz * s
245  return CoreLoad(coreData.id, cpuCycles)
246
247def run_analysis(
248    traceFile,
249    cpuConfig,
250    cpuSettings,
251    numTopN=5,
252    filterProcesses=None,
253    filterThreads=None
254):
255  tp = TraceProcessor(file_path=traceFile)
256
257  systemLoad = SystemLoad()
258  # get idle and active times per each cores
259  core_times = tp.query(QUERY_SCHED_CORE_SUM)
260  systemLoad.addTimeMeasurements(core_times, cpuSettings.onlines)
261
262  cpu_metrics = tp.metric(['android_cpu']).android_cpu
263  for p in cpu_metrics.process_info:
264    info = ProcessInfo(p.name)
265    for c in p.core:
266      l = get_core_load(c, cpuConfig)
267      info.addCoreLoad(l)
268    for t in p.threads:
269      thread_info = ThreadInfo(t.name)
270      for tc in t.core:
271        tl = get_core_load(tc, cpuConfig)
272        thread_info.addCoreLoad(tl)
273      info.threads.append(thread_info)
274    systemLoad.processes.append(info)
275
276  systemLoad.print(cpuConfig, numTopN, filterProcesses, filterThreads)
277
278def main():
279  args = init_arguments()
280
281  # parse config
282  cpuConfig = parse_config(args.config_file)
283  cpuSettings = cpuConfig.configs.get(args.cpusettings)
284  if cpuSettings is None:
285    print("Cannot find cpusettings {}".format(args.cpusettings))
286    return
287
288  threadsPerProcess = None
289  if args.thread_names is not None:
290    threadsPerProcess = {}
291    for threadName in args.thread_names:
292      names = threadName.split(':')
293      if len(names) != 2:
294        print(" Skipping {}: invalid format".format(threadName))
295        continue
296      process, thread = names
297      if process not in threadsPerProcess:
298        threadsPerProcess[process] = []
299      threadsPerProcess[process].append(thread)
300    if len(threadsPerProcess) == 0:
301      threadsPerProcess = None
302
303  run_analysis(args.trace_file[0],
304               cpuConfig,
305               cpuSettings,
306               args.number_of_top_processes,
307               args.process_names,
308               threadsPerProcess)
309
310if __name__ == '__main__':
311  main()
312