1#!/usr/bin/env python2 2# Copyright 2012 Google Inc. All Rights Reserved. 3"""One-line documentation for perf_diff module. 4 5A detailed description of perf_diff. 6""" 7 8from __future__ import print_function 9 10__author__ = 'asharif@google.com (Ahmad Sharif)' 11 12import argparse 13import re 14import sys 15 16import misc 17import tabulator 18 19ROWS_TO_SHOW = 'Rows_to_show_in_the_perf_table' 20TOTAL_EVENTS = 'Total_events_of_this_profile' 21 22 23def GetPerfDictFromReport(report_file): 24 output = {} 25 perf_report = PerfReport(report_file) 26 for k, v in perf_report.sections.items(): 27 if k not in output: 28 output[k] = {} 29 output[k][ROWS_TO_SHOW] = 0 30 output[k][TOTAL_EVENTS] = 0 31 for function in v.functions: 32 out_key = '%s' % (function.name) 33 output[k][out_key] = function.count 34 output[k][TOTAL_EVENTS] += function.count 35 if function.percent > 1: 36 output[k][ROWS_TO_SHOW] += 1 37 return output 38 39 40def _SortDictionaryByValue(d): 41 l = [(k, v) for (k, v) in d.iteritems()] 42 43 def GetFloat(x): 44 if misc.IsFloat(x): 45 return float(x) 46 else: 47 return x 48 49 sorted_l = sorted(l, key=lambda x: GetFloat(x[1])) 50 sorted_l.reverse() 51 return [f[0] for f in sorted_l] 52 53 54class Tabulator(object): 55 """Make tables.""" 56 57 def __init__(self, all_dicts): 58 self._all_dicts = all_dicts 59 60 def PrintTable(self): 61 for dicts in self._all_dicts: 62 self.PrintTableHelper(dicts) 63 64 def PrintTableHelper(self, dicts): 65 """Transfrom dicts to tables.""" 66 fields = {} 67 for d in dicts: 68 for f in d.keys(): 69 if f not in fields: 70 fields[f] = d[f] 71 else: 72 fields[f] = max(fields[f], d[f]) 73 table = [] 74 header = ['name'] 75 for i in range(len(dicts)): 76 header.append(i) 77 78 table.append(header) 79 80 sorted_fields = _SortDictionaryByValue(fields) 81 82 for f in sorted_fields: 83 row = [f] 84 for d in dicts: 85 if f in d: 86 row.append(d[f]) 87 else: 88 row.append('0') 89 table.append(row) 90 91 print(tabulator.GetSimpleTable(table)) 92 93 94class Function(object): 95 """Function for formatting.""" 96 97 def __init__(self): 98 self.count = 0 99 self.name = '' 100 self.percent = 0 101 102 103class Section(object): 104 """Section formatting.""" 105 106 def __init__(self, contents): 107 self.name = '' 108 self.raw_contents = contents 109 self._ParseSection() 110 111 def _ParseSection(self): 112 matches = re.findall(r'Events: (\w+)\s+(.*)', self.raw_contents) 113 assert len(matches) <= 1, 'More than one event found in 1 section' 114 if not matches: 115 return 116 match = matches[0] 117 self.name = match[1] 118 self.count = misc.UnitToNumber(match[0]) 119 120 self.functions = [] 121 for line in self.raw_contents.splitlines(): 122 if not line.strip(): 123 continue 124 if '%' not in line: 125 continue 126 if not line.startswith('#'): 127 fields = [f for f in line.split(' ') if f] 128 function = Function() 129 function.percent = float(fields[0].strip('%')) 130 function.count = int(fields[1]) 131 function.name = ' '.join(fields[2:]) 132 self.functions.append(function) 133 134 135class PerfReport(object): 136 """Get report from raw report.""" 137 138 def __init__(self, perf_file): 139 self.perf_file = perf_file 140 self._ReadFile() 141 self.sections = {} 142 self.metadata = {} 143 self._section_contents = [] 144 self._section_header = '' 145 self._SplitSections() 146 self._ParseSections() 147 self._ParseSectionHeader() 148 149 def _ParseSectionHeader(self): 150 """Parse a header of a perf report file.""" 151 # The "captured on" field is inaccurate - this actually refers to when the 152 # report was generated, not when the data was captured. 153 for line in self._section_header.splitlines(): 154 line = line[2:] 155 if ':' in line: 156 key, val = line.strip().split(':', 1) 157 key = key.strip() 158 val = val.strip() 159 self.metadata[key] = val 160 161 def _ReadFile(self): 162 self._perf_contents = open(self.perf_file).read() 163 164 def _ParseSections(self): 165 self.event_counts = {} 166 self.sections = {} 167 for section_content in self._section_contents: 168 section = Section(section_content) 169 section.name = self._GetHumanReadableName(section.name) 170 self.sections[section.name] = section 171 172 # TODO(asharif): Do this better. 173 def _GetHumanReadableName(self, section_name): 174 if not 'raw' in section_name: 175 return section_name 176 raw_number = section_name.strip().split(' ')[-1] 177 for line in self._section_header.splitlines(): 178 if raw_number in line: 179 name = line.strip().split(' ')[5] 180 return name 181 182 def _SplitSections(self): 183 self._section_contents = [] 184 indices = [m.start() for m in re.finditer('# Events:', self._perf_contents)] 185 indices.append(len(self._perf_contents)) 186 for i in range(len(indices) - 1): 187 section_content = self._perf_contents[indices[i]:indices[i + 1]] 188 self._section_contents.append(section_content) 189 self._section_header = '' 190 if indices: 191 self._section_header = self._perf_contents[0:indices[0]] 192 193 194class PerfDiffer(object): 195 """Perf differ class.""" 196 197 def __init__(self, reports, num_symbols, common_only): 198 self._reports = reports 199 self._num_symbols = num_symbols 200 self._common_only = common_only 201 self._common_function_names = {} 202 203 def DoDiff(self): 204 """The function that does the diff.""" 205 section_names = self._FindAllSections() 206 207 filename_dicts = [] 208 summary_dicts = [] 209 for report in self._reports: 210 d = {} 211 filename_dicts.append({'file': report.perf_file}) 212 for section_name in section_names: 213 if section_name in report.sections: 214 d[section_name] = report.sections[section_name].count 215 summary_dicts.append(d) 216 217 all_dicts = [filename_dicts, summary_dicts] 218 219 for section_name in section_names: 220 function_names = self._GetTopFunctions(section_name, self._num_symbols) 221 self._FindCommonFunctions(section_name) 222 dicts = [] 223 for report in self._reports: 224 d = {} 225 if section_name in report.sections: 226 section = report.sections[section_name] 227 228 # Get a common scaling factor for this report. 229 common_scaling_factor = self._GetCommonScalingFactor(section) 230 231 for function in section.functions: 232 if function.name in function_names: 233 key = '%s %s' % (section.name, function.name) 234 d[key] = function.count 235 # Compute a factor to scale the function count by in common_only 236 # mode. 237 if self._common_only and ( 238 function.name in self._common_function_names[section.name]): 239 d[key + ' scaled'] = common_scaling_factor * function.count 240 dicts.append(d) 241 242 all_dicts.append(dicts) 243 244 mytabulator = Tabulator(all_dicts) 245 mytabulator.PrintTable() 246 247 def _FindAllSections(self): 248 sections = {} 249 for report in self._reports: 250 for section in report.sections.values(): 251 if section.name not in sections: 252 sections[section.name] = section.count 253 else: 254 sections[section.name] = max(sections[section.name], section.count) 255 return _SortDictionaryByValue(sections) 256 257 def _GetCommonScalingFactor(self, section): 258 unique_count = self._GetCount( 259 section, lambda x: x in self._common_function_names[section.name]) 260 return 100.0 / unique_count 261 262 def _GetCount(self, section, filter_fun=None): 263 total_count = 0 264 for function in section.functions: 265 if not filter_fun or filter_fun(function.name): 266 total_count += int(function.count) 267 return total_count 268 269 def _FindCommonFunctions(self, section_name): 270 function_names_list = [] 271 for report in self._reports: 272 if section_name in report.sections: 273 section = report.sections[section_name] 274 function_names = [f.name for f in section.functions] 275 function_names_list.append(function_names) 276 277 self._common_function_names[section_name] = ( 278 reduce(set.intersection, map(set, function_names_list))) 279 280 def _GetTopFunctions(self, section_name, num_functions): 281 all_functions = {} 282 for report in self._reports: 283 if section_name in report.sections: 284 section = report.sections[section_name] 285 for f in section.functions[:num_functions]: 286 if f.name in all_functions: 287 all_functions[f.name] = max(all_functions[f.name], f.count) 288 else: 289 all_functions[f.name] = f.count 290 # FIXME(asharif): Don't really need to sort these... 291 return _SortDictionaryByValue(all_functions) 292 293 def _GetFunctionsDict(self, section, function_names): 294 d = {} 295 for function in section.functions: 296 if function.name in function_names: 297 d[function.name] = function.count 298 return d 299 300 301def Main(argv): 302 """The entry of the main.""" 303 parser = argparse.ArgumentParser() 304 parser.add_argument('-n', 305 '--num_symbols', 306 dest='num_symbols', 307 default='5', 308 help='The number of symbols to show.') 309 parser.add_argument('-c', 310 '--common_only', 311 dest='common_only', 312 action='store_true', 313 default=False, 314 help='Diff common symbols only.') 315 316 options, args = parser.parse_known_args(argv) 317 318 try: 319 reports = [] 320 for report in args[1:]: 321 report = PerfReport(report) 322 reports.append(report) 323 pd = PerfDiffer(reports, int(options.num_symbols), options.common_only) 324 pd.DoDiff() 325 finally: 326 pass 327 328 return 0 329 330 331if __name__ == '__main__': 332 sys.exit(Main(sys.argv)) 333