1# -*- coding: utf-8 -*- 2# Copyright 2018 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Python module to draw heat map for Chrome 7 8heat map is a histogram used to analyze the locality of function layout. 9 10This module is used by heat_map.py. HeatmapGenerator is a class to 11generate data for drawing heat maps (the actual drawing of heat maps is 12performed by another script perf-to-inst-page.sh). It can also analyze 13the symbol names in hot pages. 14""" 15 16from __future__ import division, print_function 17 18import bisect 19import collections 20import os 21import pipes 22import subprocess 23 24from cros_utils import command_executer 25 26HugepageRange = collections.namedtuple('HugepageRange', ['start', 'end']) 27 28 29class MMap(object): 30 """Class to store mmap information in perf report. 31 32 We assume ASLR is disabled, so MMap for all Chrome is assumed to be 33 the same. This class deals with the case hugepage creates several 34 mmaps for Chrome but should be merged together. In these case, we 35 assume the first MMAP is not affected by the bug and use the MMAP. 36 """ 37 38 def __init__(self, addr, size, offset): 39 self.start_address = addr 40 self.size = size 41 self.offset = offset 42 43 def __str__(self): 44 return '(%x, %x, %x)' % (self.start_address, self.size, self.offset) 45 46 def merge(self, mmap): 47 # This function should not be needed, since we should only have 48 # one MMAP on Chrome of each process. This function only deals with 49 # images that is affected by http://crbug.com/931465. 50 51 # This function is only checking a few conditions to make sure 52 # the bug is within our expectation. 53 54 if self.start_address == mmap.start_address: 55 assert self.size >= mmap.size, \ 56 'Original MMAP size(%x) is smaller than the forked process(%x).' % ( 57 self.size, mmap.size) 58 # The case that the MMAP is forked from the previous process 59 # No need to do anything, OR 60 # The case where hugepage causes a small Chrome mmap. 61 # In this case, we use the prior MMAP for the whole Chrome 62 return 63 64 assert self.start_address < mmap.start_address, \ 65 'Original MMAP starting address(%x) is larger than the forked' \ 66 'process(%x).' % (self.start_address, mmap.start_address) 67 68 assert self.start_address + self.size >= mmap.start_address + mmap.size, \ 69 'MMAP of the forked process exceeds the end of original MMAP.' 70 71 72class HeatmapGenerator(object): 73 """Class to generate heat map with a perf report, containing mmaps and 74 75 samples. This class contains two interfaces with other modules: 76 draw() and analyze(). 77 78 draw() draws a heatmap with the sample information given in the perf report 79 analyze() prints out the symbol names in hottest pages with the given 80 chrome binary 81 """ 82 83 def __init__(self, 84 perf_report, 85 page_size, 86 hugepage, 87 title, 88 log_level='verbose'): 89 self.perf_report = perf_report 90 # Pick 1G as a relatively large number. All addresses less than it will 91 # be recorded. The actual heatmap will show up to a boundary of the 92 # largest address in text segment. 93 self.max_addr = 1024 * 1024 * 1024 94 self.ce = command_executer.GetCommandExecuter(log_level=log_level) 95 self.dir = os.path.dirname(os.path.realpath(__file__)) 96 with open(perf_report, 'r', encoding='utf-8') as f: 97 self.perf_report_contents = f.readlines() 98 # Write histogram results to a text file, in order to use gnu plot to draw 99 self.hist_temp_output = open('out.txt', 'w', encoding='utf-8') 100 self.processes = {} 101 self.deleted_processes = {} 102 self.count = 0 103 if hugepage: 104 self.hugepage = HugepageRange(start=hugepage[0], end=hugepage[1]) 105 else: 106 self.hugepage = None 107 self.title = title 108 self.symbol_addresses = [] 109 self.symbol_names = [] 110 self.page_size = page_size 111 112 def _parse_perf_sample(self, line): 113 # In a perf report, generated with -D, a PERF_RECORD_SAMPLE command should 114 # look like this: TODO: some arguments are unknown 115 # 116 # cpuid cycle unknown [unknown]: PERF_RECORD_SAMPLE(IP, 0x2): pid/tid: 117 # 0xaddr period: period addr: addr 118 # ... thread: threadname:tid 119 # ...... dso: process 120 # 121 # This is an example: 122 # 1 136712833349 0x6a558 [0x30]: PERF_RECORD_SAMPLE(IP, 0x2): 5227/5227: 123 # 0x55555683b810 period: 372151 addr: 0 124 # ... thread: chrome:5227 125 # ...... dso: /opt/google/chrome/chrome 126 # 127 # For this function, the 7th argument (args[6]) after spltting with spaces 128 # is pid/tid. We use the combination of the two as the pid. 129 # Also, we add an assertion here to check the tid in the 7th argument( 130 # args[6]) and the 15th argument(arg[14]) are the same 131 # 132 # The function returns the ((pid,tid), address) pair if the sampling 133 # is on Chrome. Otherwise, return (None, None) pair. 134 135 if 'thread: chrome' not in line or \ 136 'dso: /opt/google/chrome/chrome' not in line: 137 return None, None 138 args = line.split(' ') 139 pid_raw = args[6].split('/') 140 assert pid_raw[1][:-1] == args[14].split(':')[1][:-1], \ 141 'TID in %s of sample is not the same: %s/%s' % ( 142 line[:-1], pid_raw[1][:-1], args[14].split(':')[1][:-1]) 143 key = (int(pid_raw[0]), int(pid_raw[1][:-1])) 144 address = int(args[7], base=16) 145 return key, address 146 147 def _parse_perf_record(self, line): 148 # In a perf report, generated with -D, a PERF_RECORD_MMAP2 command should 149 # look like this: TODO: some arguments are unknown 150 # 151 # cpuid cycle unknown [unknown]: PERF_RECORD_MMAP2 pid/tid: 152 # [0xaddr(0xlength) @ pageoffset maj:min ino ino_generation]: 153 # permission process 154 # 155 # This is an example. 156 # 2 136690556823 0xa6898 [0x80]: PERF_RECORD_MMAP2 5227/5227: 157 # [0x555556496000(0x8d1b000) @ 0xf42000 b3:03 92844 1892514370]: 158 # r-xp /opt/google/chrome/chrome 159 # 160 # For this function, the 6th argument (args[5]) after spltting with spaces 161 # is pid/tid. We use the combination of the two as the pid. 162 # The 7th argument (args[6]) is the [0xaddr(0xlength). We can peel the 163 # string to get the address and size of the mmap. 164 # The 9th argument (args[8]) is the page offset. 165 # The function returns the ((pid,tid), mmap) pair if the mmap is for Chrome 166 # is on Chrome. Otherwise, return (None, None) pair. 167 168 if 'chrome/chrome' not in line: 169 return None, None 170 args = line.split(' ') 171 pid_raw = args[5].split('/') 172 assert pid_raw[0] == pid_raw[1][:-1], \ 173 'PID in %s of mmap is not the same: %s/%s' % ( 174 line[:-1], pid_raw[0], pid_raw[1]) 175 pid = (int(pid_raw[0]), int(pid_raw[1][:-1])) 176 address_raw = args[6].split('(') 177 start_address = int(address_raw[0][1:], base=16) 178 size = int(address_raw[1][:-1], base=16) 179 offset = int(args[8], base=16) 180 # Return an mmap object instead of only starting address, 181 # in case there are many mmaps for the sample PID 182 return pid, MMap(start_address, size, offset) 183 184 def _parse_pair_event(self, arg): 185 # This function is called by the _parse_* functions that has a pattern of 186 # pids like: (pid:tid):(pid:tid), i.e. 187 # PERF_RECORD_FORK and PERF_RECORD_COMM 188 _, remain = arg.split('(', 1) 189 pid1, remain = remain.split(':', 1) 190 pid2, remain = remain.split(')', 1) 191 _, remain = remain.split('(', 1) 192 pid3, remain = remain.split(':', 1) 193 pid4, remain = remain.split(')', 1) 194 return (int(pid1), int(pid2)), (int(pid3), int(pid4)) 195 196 def _process_perf_record(self, line): 197 # This function calls _parse_perf_record() to get information from 198 # PERF_RECORD_MMAP2. It records the mmap object for each pid (a pair of 199 # pid,tid), into a dictionary. 200 pid, mmap = self._parse_perf_record(line) 201 if pid is None: 202 # PID = None meaning the mmap is not for chrome 203 return 204 if pid in self.processes: 205 # This should never happen for a correct profiling result, as we 206 # should only have one MMAP for Chrome for each process. 207 # If it happens, see http://crbug.com/931465 208 self.processes[pid].merge(mmap) 209 else: 210 self.processes[pid] = mmap 211 212 def _process_perf_fork(self, line): 213 # In a perf report, generated with -D, a PERF_RECORD_FORK command should 214 # look like this: 215 # 216 # cpuid cycle unknown [unknown]: 217 # PERF_RECORD_FORK(pid_to:tid_to):(pid_from:tid_from) 218 # 219 # This is an example. 220 # 0 0 0x22a8 [0x38]: PERF_RECORD_FORK(1:1):(0:0) 221 # 222 # In this function, we need to peel the information of pid:tid pairs 223 # So we get the last argument and send it to function _parse_pair_event() 224 # for analysis. 225 # We use (pid, tid) as the pid. 226 args = line.split(' ') 227 pid_to, pid_from = self._parse_pair_event(args[-1]) 228 if pid_from in self.processes: 229 assert pid_to not in self.processes 230 self.processes[pid_to] = MMap(self.processes[pid_from].start_address, 231 self.processes[pid_from].size, 232 self.processes[pid_from].offset) 233 234 def _process_perf_exit(self, line): 235 # In a perf report, generated with -D, a PERF_RECORD_EXIT command should 236 # look like this: 237 # 238 # cpuid cycle unknown [unknown]: 239 # PERF_RECORD_EXIT(pid1:tid1):(pid2:tid2) 240 # 241 # This is an example. 242 # 1 136082505621 0x30810 [0x38]: PERF_RECORD_EXIT(3851:3851):(3851:3851) 243 # 244 # In this function, we need to peel the information of pid:tid pairs 245 # So we get the last argument and send it to function _parse_pair_event() 246 # for analysis. 247 # We use (pid, tid) as the pid. 248 args = line.split(' ') 249 pid_to, pid_from = self._parse_pair_event(args[-1]) 250 assert pid_to == pid_from, '(%d, %d) (%d, %d)' % (pid_to[0], pid_to[1], 251 pid_from[0], pid_from[1]) 252 if pid_to in self.processes: 253 # Don't delete the process yet 254 self.deleted_processes[pid_from] = self.processes[pid_from] 255 256 def _process_perf_sample(self, line): 257 # This function calls _parse_perf_sample() to get information from 258 # the perf report. 259 # It needs to check the starting address of allocated mmap from 260 # the dictionary (self.processes) to calculate the offset within 261 # the text section of the sampling. 262 # The offset is calculated into pages (4KB or 2MB) and writes into 263 # out.txt together with the total counts, which will be used to 264 # calculate histogram. 265 pid, addr = self._parse_perf_sample(line) 266 if pid is None: 267 return 268 269 assert pid in self.processes and pid not in self.deleted_processes, \ 270 'PID %d not found mmap and not forked from another process' 271 272 start_address = self.processes[pid].start_address 273 address = addr - start_address 274 assert address >= 0 and \ 275 'addresses accessed in PERF_RECORD_SAMPLE should be larger than' \ 276 ' the starting address of Chrome' 277 if address < self.max_addr: 278 self.count += 1 279 line = '%d/%d: %d %d' % (pid[0], pid[1], self.count, 280 address // self.page_size * self.page_size) 281 if self.hugepage: 282 if self.hugepage.start <= address < self.hugepage.end: 283 line += ' hugepage' 284 else: 285 line += ' smallpage' 286 print(line, file=self.hist_temp_output) 287 288 def _read_perf_report(self): 289 # Serve as main function to read perf report, generated by -D 290 lines = iter(self.perf_report_contents) 291 for line in lines: 292 if 'PERF_RECORD_MMAP' in line: 293 self._process_perf_record(line) 294 elif 'PERF_RECORD_FORK' in line: 295 self._process_perf_fork(line) 296 elif 'PERF_RECORD_EXIT' in line: 297 self._process_perf_exit(line) 298 elif 'PERF_RECORD_SAMPLE' in line: 299 # Perf sample is multi-line 300 self._process_perf_sample(line + next(lines) + next(lines)) 301 self.hist_temp_output.close() 302 303 def _draw_heat_map(self): 304 # Calls a script (perf-to-inst-page.sh) to calculate histogram 305 # of results written in out.txt and also generate pngs for 306 # heat maps. 307 heatmap_script = os.path.join(self.dir, 'perf-to-inst-page.sh') 308 if self.hugepage: 309 hp_arg = 'hugepage' 310 else: 311 hp_arg = 'none' 312 313 cmd = '{0} {1} {2}'.format(heatmap_script, pipes.quote(self.title), hp_arg) 314 retval = self.ce.RunCommand(cmd) 315 if retval: 316 raise RuntimeError('Failed to run script to generate heatmap') 317 318 def _restore_histogram(self): 319 # When hugepage is used, there are two files inst-histo-{hp,sp}.txt 320 # So we need to read in all the files. 321 names = [x for x in os.listdir('.') if 'inst-histo' in x and '.txt' in x] 322 hist = {} 323 for n in names: 324 with open(n, encoding='utf-8') as f: 325 for l in f.readlines(): 326 num, addr = l.strip().split(' ') 327 assert int(addr) not in hist 328 hist[int(addr)] = int(num) 329 return hist 330 331 def _read_symbols_from_binary(self, binary): 332 # FIXME: We are using nm to read symbol names from Chrome binary 333 # for now. Can we get perf to hand us symbol names, instead of 334 # using nm in the future? 335 # 336 # Get all the symbols (and their starting addresses) that fall into 337 # the page. Will be used to print out information of hot pages 338 # Each line shows the information of a symbol: 339 # [symbol value (0xaddr)] [symbol type] [symbol name] 340 # For some symbols, the [symbol name] field might be missing. 341 # e.g. 342 # 0000000001129da0 t Builtins_LdaNamedPropertyHandler 343 344 # Generate a list of symbols from nm tool and check each line 345 # to extract symbols names 346 text_section_start = 0 347 for l in subprocess.check_output(['nm', '-n', binary]).split('\n'): 348 args = l.strip().split(' ') 349 if len(args) < 3: 350 # No name field 351 continue 352 addr_raw, symbol_type, name = args 353 addr = int(addr_raw, base=16) 354 if 't' not in symbol_type and 'T' not in symbol_type: 355 # Filter out symbols not in text sections 356 continue 357 if not self.symbol_addresses: 358 # The first symbol in text sections 359 text_section_start = addr 360 self.symbol_addresses.append(0) 361 self.symbol_names.append(name) 362 else: 363 assert text_section_start != 0, \ 364 'The starting address of text section has not been found' 365 if addr == self.symbol_addresses[-1]: 366 # if the same address has multiple symbols, put them together 367 # and separate symbol names with '/' 368 self.symbol_names[-1] += '/' + name 369 else: 370 # The output of nm -n command is already sorted by address 371 # Insert to the end will result in a sorted array for bisect 372 self.symbol_addresses.append(addr - text_section_start) 373 self.symbol_names.append(name) 374 375 def _map_addr_to_symbol(self, addr): 376 # Find out the symbol name 377 assert self.symbol_addresses 378 index = bisect.bisect(self.symbol_addresses, addr) 379 assert 0 < index <= len(self.symbol_names), \ 380 'Failed to find an index (%d) in the list (len=%d)' % ( 381 index, len(self.symbol_names)) 382 return self.symbol_names[index - 1] 383 384 def _print_symbols_in_hot_pages(self, fp, pages_to_show): 385 # Print symbols in all the pages of interest 386 for page_num, sample_num in pages_to_show: 387 print( 388 '----------------------------------------------------------', file=fp) 389 print( 390 'Page Offset: %d MB, Count: %d' % (page_num // 1024 // 1024, 391 sample_num), 392 file=fp) 393 394 symbol_counts = collections.Counter() 395 # Read Sample File and find out the occurance of symbols in the page 396 lines = iter(self.perf_report_contents) 397 for line in lines: 398 if 'PERF_RECORD_SAMPLE' in line: 399 pid, addr = self._parse_perf_sample(line + next(lines) + next(lines)) 400 if pid is None: 401 # The sampling is not on Chrome 402 continue 403 if addr // self.page_size != ( 404 self.processes[pid].start_address + page_num) // self.page_size: 405 # Sampling not in the current page 406 continue 407 408 name = self._map_addr_to_symbol(addr - 409 self.processes[pid].start_address) 410 assert name, 'Failed to find symbol name of addr %x' % addr 411 symbol_counts[name] += 1 412 413 assert sum(symbol_counts.values()) == sample_num, \ 414 'Symbol name matching missing for some addresses: %d vs %d' % ( 415 sum(symbol_counts.values()), sample_num) 416 417 # Print out the symbol names sorted by the number of samples in 418 # the page 419 for name, count in sorted( 420 symbol_counts.items(), key=lambda kv: kv[1], reverse=True): 421 if count == 0: 422 break 423 print('> %s : %d' % (name, count), file=fp) 424 print('\n\n', file=fp) 425 426 def draw(self): 427 # First read perf report to process information and save histogram 428 # into a text file 429 self._read_perf_report() 430 # Then use gnu plot to draw heat map 431 self._draw_heat_map() 432 433 def analyze(self, binary, top_n): 434 # Read histogram from histo.txt 435 hist = self._restore_histogram() 436 # Sort the pages in histogram 437 sorted_hist = sorted(hist.items(), key=lambda value: value[1], reverse=True) 438 439 # Generate symbolizations 440 self._read_symbols_from_binary(binary) 441 442 # Write hottest pages 443 with open('addr2symbol.txt', 'w', encoding='utf-8') as fp: 444 if self.hugepage: 445 # Print hugepage region first 446 print( 447 'Hugepage top %d hot pages (%d MB - %d MB):' % 448 (top_n, self.hugepage.start // 1024 // 1024, 449 self.hugepage.end // 1024 // 1024), 450 file=fp) 451 pages_to_print = [(k, v) 452 for k, v in sorted_hist 453 if self.hugepage.start <= k < self.hugepage.end 454 ][:top_n] 455 self._print_symbols_in_hot_pages(fp, pages_to_print) 456 print('==========================================', file=fp) 457 print('Top %d hot pages landed outside of hugepage:' % top_n, file=fp) 458 # Then print outside pages 459 pages_to_print = [(k, v) 460 for k, v in sorted_hist 461 if k < self.hugepage.start or k >= self.hugepage.end 462 ][:top_n] 463 self._print_symbols_in_hot_pages(fp, pages_to_print) 464 else: 465 # Print top_n hottest pages. 466 pages_to_print = sorted_hist[:top_n] 467 self._print_symbols_in_hot_pages(fp, pages_to_print) 468