1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3# Copyright 2019 The Chromium OS Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7"""One-line documentation for perf_diff module. 8 9A detailed description of perf_diff. 10""" 11 12from __future__ import print_function 13 14__author__ = 'asharif@google.com (Ahmad Sharif)' 15 16import argparse 17import functools 18import re 19import sys 20 21from cros_utils import misc 22from cros_utils import tabulator 23 24ROWS_TO_SHOW = 'Rows_to_show_in_the_perf_table' 25TOTAL_EVENTS = 'Total_events_of_this_profile' 26 27 28def GetPerfDictFromReport(report_file): 29 output = {} 30 perf_report = PerfReport(report_file) 31 for k, v in perf_report.sections.items(): 32 if k not in output: 33 output[k] = {} 34 output[k][ROWS_TO_SHOW] = 0 35 output[k][TOTAL_EVENTS] = 0 36 for function in v.functions: 37 out_key = '%s' % (function.name) 38 output[k][out_key] = function.count 39 output[k][TOTAL_EVENTS] += function.count 40 if function.percent > 1: 41 output[k][ROWS_TO_SHOW] += 1 42 return output 43 44 45def _SortDictionaryByValue(d): 46 l = d.items() 47 48 def GetFloat(x): 49 if misc.IsFloat(x): 50 return float(x) 51 else: 52 return x 53 54 sorted_l = sorted(l, key=lambda x: GetFloat(x[1])) 55 sorted_l.reverse() 56 return [f[0] for f in sorted_l] 57 58 59class Tabulator(object): 60 """Make tables.""" 61 62 def __init__(self, all_dicts): 63 self._all_dicts = all_dicts 64 65 def PrintTable(self): 66 for dicts in self._all_dicts: 67 self.PrintTableHelper(dicts) 68 69 def PrintTableHelper(self, dicts): 70 """Transfrom dicts to tables.""" 71 fields = {} 72 for d in dicts: 73 for f in d.keys(): 74 if f not in fields: 75 fields[f] = d[f] 76 else: 77 fields[f] = max(fields[f], d[f]) 78 table = [] 79 header = ['name'] 80 for i in range(len(dicts)): 81 header.append(i) 82 83 table.append(header) 84 85 sorted_fields = _SortDictionaryByValue(fields) 86 87 for f in sorted_fields: 88 row = [f] 89 for d in dicts: 90 if f in d: 91 row.append(d[f]) 92 else: 93 row.append('0') 94 table.append(row) 95 96 print(tabulator.GetSimpleTable(table)) 97 98 99class Function(object): 100 """Function for formatting.""" 101 102 def __init__(self): 103 self.count = 0 104 self.name = '' 105 self.percent = 0 106 107 108class Section(object): 109 """Section formatting.""" 110 111 def __init__(self, contents): 112 self.name = '' 113 self.raw_contents = contents 114 self._ParseSection() 115 116 def _ParseSection(self): 117 matches = re.findall(r'Events: (\w+)\s+(.*)', self.raw_contents) 118 assert len(matches) <= 1, 'More than one event found in 1 section' 119 if not matches: 120 return 121 match = matches[0] 122 self.name = match[1] 123 self.count = misc.UnitToNumber(match[0]) 124 125 self.functions = [] 126 for line in self.raw_contents.splitlines(): 127 if not line.strip(): 128 continue 129 if '%' not in line: 130 continue 131 if not line.startswith('#'): 132 fields = [f for f in line.split(' ') if f] 133 function = Function() 134 function.percent = float(fields[0].strip('%')) 135 function.count = int(fields[1]) 136 function.name = ' '.join(fields[2:]) 137 self.functions.append(function) 138 139 140class PerfReport(object): 141 """Get report from raw report.""" 142 143 def __init__(self, perf_file): 144 self.perf_file = perf_file 145 self._ReadFile() 146 self.sections = {} 147 self.metadata = {} 148 self._section_contents = [] 149 self._section_header = '' 150 self._SplitSections() 151 self._ParseSections() 152 self._ParseSectionHeader() 153 154 def _ParseSectionHeader(self): 155 """Parse a header of a perf report file.""" 156 # The "captured on" field is inaccurate - this actually refers to when the 157 # report was generated, not when the data was captured. 158 for line in self._section_header.splitlines(): 159 line = line[2:] 160 if ':' in line: 161 key, val = line.strip().split(':', 1) 162 key = key.strip() 163 val = val.strip() 164 self.metadata[key] = val 165 166 def _ReadFile(self): 167 self._perf_contents = open(self.perf_file).read() 168 169 def _ParseSections(self): 170 self.event_counts = {} 171 self.sections = {} 172 for section_content in self._section_contents: 173 section = Section(section_content) 174 section.name = self._GetHumanReadableName(section.name) 175 self.sections[section.name] = section 176 177 # TODO(asharif): Do this better. 178 def _GetHumanReadableName(self, section_name): 179 if not 'raw' in section_name: 180 return section_name 181 raw_number = section_name.strip().split(' ')[-1] 182 for line in self._section_header.splitlines(): 183 if raw_number in line: 184 name = line.strip().split(' ')[5] 185 return name 186 187 def _SplitSections(self): 188 self._section_contents = [] 189 indices = [m.start() for m in re.finditer('# Events:', self._perf_contents)] 190 indices.append(len(self._perf_contents)) 191 for i in range(len(indices) - 1): 192 section_content = self._perf_contents[indices[i]:indices[i + 1]] 193 self._section_contents.append(section_content) 194 self._section_header = '' 195 if indices: 196 self._section_header = self._perf_contents[0:indices[0]] 197 198 199class PerfDiffer(object): 200 """Perf differ class.""" 201 202 def __init__(self, reports, num_symbols, common_only): 203 self._reports = reports 204 self._num_symbols = num_symbols 205 self._common_only = common_only 206 self._common_function_names = {} 207 208 def DoDiff(self): 209 """The function that does the diff.""" 210 section_names = self._FindAllSections() 211 212 filename_dicts = [] 213 summary_dicts = [] 214 for report in self._reports: 215 d = {} 216 filename_dicts.append({'file': report.perf_file}) 217 for section_name in section_names: 218 if section_name in report.sections: 219 d[section_name] = report.sections[section_name].count 220 summary_dicts.append(d) 221 222 all_dicts = [filename_dicts, summary_dicts] 223 224 for section_name in section_names: 225 function_names = self._GetTopFunctions(section_name, self._num_symbols) 226 self._FindCommonFunctions(section_name) 227 dicts = [] 228 for report in self._reports: 229 d = {} 230 if section_name in report.sections: 231 section = report.sections[section_name] 232 233 # Get a common scaling factor for this report. 234 common_scaling_factor = self._GetCommonScalingFactor(section) 235 236 for function in section.functions: 237 if function.name in function_names: 238 key = '%s %s' % (section.name, function.name) 239 d[key] = function.count 240 # Compute a factor to scale the function count by in common_only 241 # mode. 242 if self._common_only and ( 243 function.name in self._common_function_names[section.name]): 244 d[key + ' scaled'] = common_scaling_factor * function.count 245 dicts.append(d) 246 247 all_dicts.append(dicts) 248 249 mytabulator = Tabulator(all_dicts) 250 mytabulator.PrintTable() 251 252 def _FindAllSections(self): 253 sections = {} 254 for report in self._reports: 255 for section in report.sections.values(): 256 if section.name not in sections: 257 sections[section.name] = section.count 258 else: 259 sections[section.name] = max(sections[section.name], section.count) 260 return _SortDictionaryByValue(sections) 261 262 def _GetCommonScalingFactor(self, section): 263 unique_count = self._GetCount( 264 section, lambda x: x in self._common_function_names[section.name]) 265 return 100.0 / unique_count 266 267 def _GetCount(self, section, filter_fun=None): 268 total_count = 0 269 for function in section.functions: 270 if not filter_fun or filter_fun(function.name): 271 total_count += int(function.count) 272 return total_count 273 274 def _FindCommonFunctions(self, section_name): 275 function_names_list = [] 276 for report in self._reports: 277 if section_name in report.sections: 278 section = report.sections[section_name] 279 function_names = {f.name for f in section.functions} 280 function_names_list.append(function_names) 281 282 self._common_function_names[section_name] = ( 283 functools.reduce(set.intersection, function_names_list)) 284 285 def _GetTopFunctions(self, section_name, num_functions): 286 all_functions = {} 287 for report in self._reports: 288 if section_name in report.sections: 289 section = report.sections[section_name] 290 for f in section.functions[:num_functions]: 291 if f.name in all_functions: 292 all_functions[f.name] = max(all_functions[f.name], f.count) 293 else: 294 all_functions[f.name] = f.count 295 # FIXME(asharif): Don't really need to sort these... 296 return _SortDictionaryByValue(all_functions) 297 298 def _GetFunctionsDict(self, section, function_names): 299 d = {} 300 for function in section.functions: 301 if function.name in function_names: 302 d[function.name] = function.count 303 return d 304 305 306def Main(argv): 307 """The entry of the main.""" 308 parser = argparse.ArgumentParser() 309 parser.add_argument( 310 '-n', 311 '--num_symbols', 312 dest='num_symbols', 313 default='5', 314 help='The number of symbols to show.') 315 parser.add_argument( 316 '-c', 317 '--common_only', 318 dest='common_only', 319 action='store_true', 320 default=False, 321 help='Diff common symbols only.') 322 323 options, args = parser.parse_known_args(argv) 324 325 try: 326 reports = [] 327 for report in args[1:]: 328 report = PerfReport(report) 329 reports.append(report) 330 pd = PerfDiffer(reports, int(options.num_symbols), options.common_only) 331 pd.DoDiff() 332 finally: 333 pass 334 335 return 0 336 337 338if __name__ == '__main__': 339 sys.exit(Main(sys.argv)) 340