1# SPDX-License-Identifier: Apache-2.0 2# 3# Copyright (C) 2015, ARM Limited and contributors. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); you may 6# not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18""" Latency Analysis Module """ 19 20import matplotlib.gridspec as gridspec 21import matplotlib.pyplot as plt 22import numpy as np 23import pandas as pd 24import pylab as pl 25import re 26 27from collections import namedtuple 28from analysis_module import AnalysisModule 29from devlib.utils.misc import memoized 30from trappy.utils import listify 31 32# Tuple representing all IDs data of a Task 33TaskData = namedtuple('TaskData', ['pid', 'names', 'label']) 34 35CDF = namedtuple('CDF', ['df', 'threshold', 'above', 'below']) 36 37class LatencyAnalysis(AnalysisModule): 38 """ 39 Support for plotting Latency Analysis data 40 41 :param trace: input Trace object 42 :type trace: :mod:`libs.utils.Trace` 43 """ 44 45 def __init__(self, trace): 46 super(LatencyAnalysis, self).__init__(trace) 47 48############################################################################### 49# DataFrame Getter Methods 50############################################################################### 51 52 @memoized 53 def _dfg_latency_df(self, task): 54 """ 55 DataFrame of task's wakeup/suspend events 56 57 The returned DataFrame index is the time, in seconds, an event related 58 to `task` happened. 59 The DataFrame has these columns: 60 - target_cpu: the CPU where the task has been scheduled 61 reported only for wakeup events 62 - curr_state: the current task state: 63 A letter which corresponds to the standard events reported by the 64 prev_state field of a sched_switch event. 65 Only exception is 'A', which is used to represent active tasks, 66 i.e. tasks RUNNING on a CPU 67 - next_state: the next status for the task 68 - t_start: the time when the current status started, it matches Time 69 - t_delta: the interval of time after witch the task will switch to the 70 next_state 71 72 :param task: the task to report wakeup latencies for 73 :type task: int or str 74 """ 75 76 if not self._trace.hasEvents('sched_wakeup'): 77 self._log.warning('Events [sched_wakeup] not found, ' 78 'cannot compute CPU active signal!') 79 return None 80 if not self._trace.hasEvents('sched_switch'): 81 self._log.warning('Events [sched_switch] not found, ' 82 'cannot compute CPU active signal!') 83 return None 84 85 # Get task data 86 td = self._getTaskData(task) 87 if not td: 88 return None 89 90 wk_df = self._dfg_trace_event('sched_wakeup') 91 sw_df = self._dfg_trace_event('sched_switch') 92 93 # Filter Task's WAKEUP events 94 task_wakeup = wk_df[wk_df.pid == td.pid][['target_cpu', 'pid']] 95 96 # Filter Task's START events 97 task_events = (sw_df.prev_pid == td.pid) | (sw_df.next_pid == td.pid) 98 task_switches_df = sw_df[task_events]\ 99 [['__cpu', 'prev_pid', 'next_pid', 'prev_state']] 100 101 # Unset prev_state for switch_in events, i.e. 102 # we don't care about the status of a task we are replacing 103 task_switches_df.prev_state = task_switches_df.apply( 104 lambda r : np.nan if r['prev_pid'] != td.pid 105 else self._taskState(r['prev_state']), 106 axis=1) 107 108 # Rename prev_state 109 task_switches_df.rename(columns={'prev_state' : 'curr_state'}, inplace=True) 110 111 # Fill in Running status 112 # We've just set curr_state (a.k.a prev_state) to nan where td.pid was 113 # switching in, so set the state to 'A' ("active") in those places. 114 task_switches_df.curr_state = task_switches_df.curr_state.fillna(value='A') 115 116 # Join Wakeup and SchedSwitch events 117 task_latency_df = task_wakeup.join(task_switches_df, how='outer', 118 lsuffix='_wkp', rsuffix='_slp') 119 # Remove not required columns 120 task_latency_df = task_latency_df[['target_cpu', '__cpu', 'curr_state']] 121 # Set Wakeup state on each Wakeup event 122 task_latency_df.curr_state = task_latency_df.curr_state.fillna(value='W') 123 124 # Sanity check for all task states to be mapped to a char 125 numbers = 0 126 for value in task_switches_df.curr_state.unique(): 127 if type(value) is not str: 128 self._log.warning('The [sched_switch] events contain "prev_state" value [%s]', 129 value) 130 numbers += 1 131 if numbers: 132 verb = 'is' if numbers == 1 else 'are' 133 self._log.warning(' which %s not currently mapped into a task state.', 134 verb) 135 self._log.warning('Check mappings in:') 136 self._log.warning(' %s::%s _taskState()', 137 __file__, self.__class__.__name__) 138 139 # Forward annotate task state 140 task_latency_df['next_state'] = task_latency_df.curr_state.shift(-1) 141 142 # Forward account for previous state duration 143 task_latency_df['t_start'] = task_latency_df.index 144 task_latency_df['t_delta'] = ( 145 task_latency_df['t_start'].shift(-1) 146 - task_latency_df['t_start'] 147 ) 148 149 return task_latency_df 150 151 152 # Select Wakeup latency 153 def _dfg_latency_wakeup_df(self, task): 154 """ 155 DataFrame of task's wakeup latencies 156 157 The returned DataFrame index is the time, in seconds, `task` waken-up. 158 The DataFrame has just one column: 159 - wakeup_latency: the time the task waited before getting a CPU 160 161 :param task: the task to report wakeup latencies for 162 :type task: int or str 163 """ 164 165 task_latency_df = self._dfg_latency_df(task) 166 if task_latency_df is None: 167 return None 168 df = task_latency_df[ 169 (task_latency_df.curr_state == 'W') & 170 (task_latency_df.next_state == 'A')][['t_delta']] 171 df.rename(columns={'t_delta' : 'wakeup_latency'}, inplace=True) 172 return df 173 174 # Select Wakeup latency 175 def _dfg_latency_preemption_df(self, task): 176 """ 177 DataFrame of task's preemption latencies 178 179 The returned DataFrame index is the time, in seconds, `task` has been 180 preempted. 181 The DataFrame has just one column: 182 - preemption_latency: the time the task waited before getting again a CPU 183 184 :param task: the task to report wakeup latencies for 185 :type task: int or str 186 """ 187 task_latency_df = self._dfg_latency_df(task) 188 if task_latency_df is None: 189 return None 190 df = task_latency_df[ 191 (task_latency_df.curr_state.isin([0, 'R', 'R+'])) & 192 (task_latency_df.next_state == 'A')][['t_delta']] 193 df.rename(columns={'t_delta' : 'preempt_latency'}, inplace=True) 194 return df 195 196 @memoized 197 def _dfg_activations_df(self, task): 198 """ 199 DataFrame of task's wakeup intrvals 200 201 The returned DataFrame index is the time, in seconds, `task` has 202 waken-up. 203 The DataFrame has just one column: 204 - activation_interval: the time since the previous wakeup events 205 206 :param task: the task to report runtimes for 207 :type task: int or str 208 """ 209 # Select all wakeup events 210 wkp_df = self._dfg_latency_df(task) 211 wkp_df = wkp_df[wkp_df.curr_state == 'W'].copy() 212 # Compute delta between successive wakeup events 213 wkp_df['activation_interval'] = ( 214 wkp_df['t_start'].shift(-1) - wkp_df['t_start']) 215 wkp_df['activation_interval'] = wkp_df['activation_interval'].shift(1) 216 # Return the activation period each time the task wakeups 217 wkp_df = wkp_df[['activation_interval']].shift(-1) 218 return wkp_df 219 220 @memoized 221 def _dfg_runtimes_df(self, task): 222 """ 223 DataFrame of task's runtime each time the task blocks 224 225 The returned DataFrame index is the time, in seconds, `task` completed 226 an activation (i.e. sleep or exit) 227 The DataFrame has just one column: 228 - running_time: the time the task spent RUNNING since its last wakeup 229 230 :param task: the task to report runtimes for 231 :type task: int or str 232 """ 233 # Select all wakeup events 234 run_df = self._dfg_latency_df(task) 235 236 # Filter function to add up RUNNING intervals of each activation 237 def cr(row): 238 if row['curr_state'] in ['S']: 239 return cr.runtime 240 if row['curr_state'] in ['W']: 241 if cr.spurious_wkp: 242 cr.runtime += row['t_delta'] 243 cr.spurious_wkp = False 244 return cr.runtime 245 cr.runtime = 0 246 return cr.runtime 247 if row['curr_state'] != 'A': 248 return cr.runtime 249 if row['next_state'] in ['R', 'R+', 'S', 'x', 'D']: 250 cr.runtime += row['t_delta'] 251 return cr.runtime 252 # This is required to capture strange trace sequences where 253 # a switch_in event is follower by a wakeup_event. 254 # This sequence is not expected, but we found it in some traces. 255 # Possible reasons could be: 256 # - misplaced sched_wakeup events 257 # - trace buffer artifacts 258 # TO BE BETTER investigated in kernel space. 259 # For the time being, we account this interval as RUNNING time, 260 # which is what kernelshark does. 261 if row['next_state'] in ['W']: 262 cr.runtime += row['t_delta'] 263 cr.spurious_wkp = True 264 return cr.runtime 265 if row['next_state'] in ['n']: 266 return cr.runtime 267 self._log.warning("Unexpected next state: %s @ %f", 268 row['next_state'], row['t_start']) 269 return 0 270 # cr's static variables intialization 271 cr.runtime = 0 272 cr.spurious_wkp = False 273 274 # Add up RUNNING intervals of each activation 275 run_df['running_time'] = run_df.apply(cr, axis=1) 276 # Return RUNTIME computed for each activation, 277 # each time the task blocks or terminate 278 run_df = run_df[run_df.next_state.isin(['S', 'x'])][['running_time']] 279 return run_df 280 281############################################################################### 282# Plotting Methods 283############################################################################### 284 285 def plotLatency(self, task, kind='all', tag=None, threshold_ms=1, bins=64): 286 """ 287 Generate a set of plots to report the WAKEUP and PREEMPT latencies the 288 specified task has been subject to. A WAKEUP latencies is the time from 289 when a task becomes RUNNABLE till the first time it gets a CPU. 290 A PREEMPT latencies is the time from when a RUNNING task is suspended 291 because of the CPU is assigned to another task till when the task 292 enters the CPU again. 293 294 :param task: the task to report latencies for 295 :type task: int or list(str) 296 297 :param kind: the kind of latencies to report (WAKEUP and/or PREEMPT") 298 :type kind: str 299 300 :param tag: a string to add to the plot title 301 :type tag: str 302 303 :param threshold_ms: the minimum acceptable [ms] value to report 304 graphically in the generated plots 305 :type threshold_ms: int or float 306 307 :param bins: number of bins to be used for the runtime's histogram 308 :type bins: int 309 310 :returns: a DataFrame with statistics on ploted latencies 311 """ 312 313 if not self._trace.hasEvents('sched_switch'): 314 self._log.warning('Event [sched_switch] not found, ' 315 'plot DISABLED!') 316 return 317 if not self._trace.hasEvents('sched_wakeup'): 318 self._log.warning('Event [sched_wakeup] not found, ' 319 'plot DISABLED!') 320 return 321 322 # Get task data 323 td = self._getTaskData(task) 324 if not td: 325 return None 326 327 # Load wakeup latencies (if required) 328 wkp_df = None 329 if 'all' in kind or 'wakeup' in kind: 330 wkp_df = self._dfg_latency_wakeup_df(td.pid) 331 if wkp_df is not None: 332 wkp_df.rename(columns={'wakeup_latency' : 'latency'}, inplace=True) 333 self._log.info('Found: %5d WAKEUP latencies', len(wkp_df)) 334 335 # Load preempt latencies (if required) 336 prt_df = None 337 if 'all' in kind or 'preempt' in kind: 338 prt_df = self._dfg_latency_preemption_df(td.pid) 339 if prt_df is not None: 340 prt_df.rename(columns={'preempt_latency' : 'latency'}, inplace=True) 341 self._log.info('Found: %5d PREEMPT latencies', len(prt_df)) 342 343 if wkp_df is None and prt_df is None: 344 self._log.warning('No Latency info for task [%s]', td.label) 345 return 346 347 # Join the two data frames 348 df = wkp_df.append(prt_df) 349 ymax = 1.1 * df.latency.max() 350 self._log.info('Total: %5d latency events', len(df)) 351 352 # Build the series for the CDF 353 cdf = self._getCDF(df.latency, (threshold_ms / 1000.)) 354 self._log.info('%.1f %% samples below %d [ms] threshold', 355 100. * cdf.below, threshold_ms) 356 357 # Setup plots 358 gs = gridspec.GridSpec(2, 2, height_ratios=[2,1], width_ratios=[1,1]) 359 plt.figure(figsize=(16, 8)) 360 361 plot_title = "[{}]: {} latencies".format(td.label, kind.upper()) 362 if tag: 363 plot_title = "{} [{}]".format(plot_title, tag) 364 plot_title = "{}, threshold @ {} [ms]".format(plot_title, threshold_ms) 365 366 # Latency events duration over time 367 axes = plt.subplot(gs[0,0:2]) 368 axes.set_title(plot_title) 369 try: 370 wkp_df.rename(columns={'latency': 'wakeup'}, inplace=True) 371 wkp_df.plot(style='b+', logy=True, ax=axes) 372 except: pass 373 try: 374 prt_df.rename(columns={'latency' : 'preempt'}, inplace=True) 375 prt_df.plot(style='r+', logy=True, ax=axes) 376 except: pass 377 axes.axhline(threshold_ms / 1000., linestyle='--', color='g') 378 self._trace.analysis.status.plotOverutilized(axes) 379 axes.legend(loc='lower center', ncol=2) 380 axes.set_xlim(self._trace.x_min, self._trace.x_max) 381 382 # Cumulative distribution of latencies samples 383 axes = plt.subplot(gs[1,0]) 384 cdf.df.plot(ax=axes, legend=False, xlim=(0,None), 385 title='Latencies CDF ({:.1f}% within {} [ms] threshold)'\ 386 .format(100. * cdf.below, threshold_ms)) 387 axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5); 388 axes.axhline(y=cdf.below, linewidth=1, color='r', linestyle='--') 389 390 # Histogram of all latencies 391 axes = plt.subplot(gs[1,1]) 392 df.latency.plot(kind='hist', bins=bins, ax=axes, 393 xlim=(0,ymax), legend=False, 394 title='Latency histogram ({} bins, {} [ms] green threshold)'\ 395 .format(bins, threshold_ms)); 396 axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5); 397 398 # Save generated plots into datadir 399 task_name = re.sub('[\ :/]', '_', td.label) 400 figname = '{}/{}task_latencies_{}_{}.png'\ 401 .format(self._trace.plots_dir, self._trace.plots_prefix, 402 td.pid, task_name) 403 pl.savefig(figname, bbox_inches='tight') 404 405 # Return statistics 406 stats_df = df.describe(percentiles=[0.95, 0.99]) 407 label = '{:.1f}%'.format(100. * cdf.below) 408 stats = { label : cdf.threshold } 409 return stats_df.append(pd.DataFrame( 410 stats.values(), columns=['latency'], index=stats.keys())) 411 412 413 def plotLatencyBands(self, task, axes=None): 414 """ 415 Draw a plot that shows intervals of time when the execution of a 416 RUNNABLE task has been delayed. The plot reports: 417 WAKEUP lantecies as RED colored bands 418 PREEMPTION lantecies as BLUE colored bands 419 420 The optional axes parameter allows to plot the signal on an existing 421 graph. 422 423 :param task: the task to report latencies for 424 :type task: str 425 426 :param axes: axes on which to plot the signal 427 :type axes: :mod:`matplotlib.axes.Axes` 428 """ 429 if not self._trace.hasEvents('sched_switch'): 430 self._log.warning('Event [sched_switch] not found, ' 431 'plot DISABLED!') 432 return 433 if not self._trace.hasEvents('sched_wakeup'): 434 self._log.warning('Event [sched_wakeup] not found, ' 435 'plot DISABLED!') 436 return 437 438 # Get task PID 439 td = self._getTaskData(task) 440 if not td: 441 return None 442 443 wkl_df = self._dfg_latency_wakeup_df(td.pid) 444 prt_df = self._dfg_latency_preemption_df(td.pid) 445 446 if wkl_df is None and prt_df is None: 447 self._log.warning('No task with name [%s]', td.label) 448 return 449 450 # If not axis provided: generate a standalone plot 451 if not axes: 452 gs = gridspec.GridSpec(1, 1) 453 plt.figure(figsize=(16, 2)) 454 axes = plt.subplot(gs[0, 0]) 455 axes.set_title('Latencies on [{}] ' 456 '(red: WAKEUP, blue: PREEMPT)'\ 457 .format(td.label)) 458 axes.set_xlim(self._trace.x_min, self._trace.x_max) 459 axes.set_yticklabels([]) 460 axes.set_xlabel('Time [s]') 461 axes.grid(True) 462 463 # Draw WAKEUP latencies 464 try: 465 bands = [(t, wkl_df['wakeup_latency'][t]) for t in wkl_df.index] 466 for (start, duration) in bands: 467 end = start + duration 468 axes.axvspan(start, end, facecolor='r', alpha=0.1) 469 axes.set_xlim(self._trace.x_min, self._trace.x_max) 470 except: pass 471 472 # Draw PREEMPTION latencies 473 try: 474 bands = [(t, prt_df['preempt_latency'][t]) for t in prt_df.index] 475 for (start, duration) in bands: 476 end = start + duration 477 axes.axvspan(start, end, facecolor='b', alpha=0.1) 478 axes.set_xlim(self._trace.x_min, self._trace.x_max) 479 except: pass 480 481 def plotActivations(self, task, tag=None, threshold_ms=16, bins=64): 482 """ 483 Plots "activation intervals" for the specified task 484 485 An "activation interval" is time incurring between two consecutive 486 wakeups of a task. A set of plots is generated to report: 487 - Activations interval at wakeup time: every time a task wakeups a 488 point is plotted to represent the time interval since the previous 489 wakeup. 490 - Activations interval cumulative function: reports the cumulative 491 function of the activation intervals. 492 - Activations intervals histogram: reports a 64 bins histogram of 493 the activation intervals. 494 495 All plots are parameterized based on the value of threshold_ms, which 496 can be used to filter activations intervals bigger than 2 times this 497 value. 498 Such a threshold is useful to filter out from the plots outliers thus 499 focusing the analysis in the most critical periodicity under analysis. 500 The number and percentage of discarded samples is reported in output. 501 A default threshold of 16 [ms] is used, which is useful for example 502 to analyze a 60Hz rendering pipelines. 503 504 A PNG of the generated plots is generated and saved in the same folder 505 where the trace is. 506 507 :param task: the task to report latencies for 508 :type task: int or list(str) 509 510 :param tag: a string to add to the plot title 511 :type tag: str 512 513 :param threshold_ms: the minimum acceptable [ms] value to report 514 graphically in the generated plots 515 :type threshold_ms: int or float 516 517 :param bins: number of bins to be used for the runtime's histogram 518 :type bins: int 519 520 :returns: a DataFrame with statistics on ploted activation intervals 521 """ 522 523 if not self._trace.hasEvents('sched_switch'): 524 self._log.warning('Event [sched_switch] not found, ' 525 'plot DISABLED!') 526 return 527 if not self._trace.hasEvents('sched_wakeup'): 528 self._log.warning('Event [sched_wakeup] not found, ' 529 'plot DISABLED!') 530 return 531 532 # Get task data 533 td = self._getTaskData(task) 534 if not td: 535 return None 536 537 # Load activation data 538 wkp_df = self._dfg_activations_df(td.pid) 539 if wkp_df is None: 540 return None 541 self._log.info('Found: %5d activations for [%s]', 542 len(wkp_df), td.label) 543 544 # Disregard data above two time the specified threshold 545 y_max = (2 * threshold_ms) / 1000. 546 len_tot = len(wkp_df) 547 wkp_df = wkp_df[wkp_df.activation_interval <= y_max] 548 len_plt = len(wkp_df) 549 if len_plt < len_tot: 550 len_dif = len_tot - len_plt 551 len_pct = 100. * len_dif / len_tot 552 self._log.warning('Discarding {} activation intervals (above 2 x threshold_ms, ' 553 '{:.1f}% of the overall activations)'\ 554 .format(len_dif, len_pct)) 555 ymax = 1.1 * wkp_df.activation_interval.max() 556 557 # Build the series for the CDF 558 cdf = self._getCDF(wkp_df.activation_interval, (threshold_ms / 1000.)) 559 self._log.info('%.1f %% samples below %d [ms] threshold', 560 100. * cdf.below, threshold_ms) 561 562 # Setup plots 563 gs = gridspec.GridSpec(2, 2, height_ratios=[2,1], width_ratios=[1,1]) 564 plt.figure(figsize=(16, 8)) 565 566 plot_title = "[{}]: activaton intervals (@ wakeup time)".format(td.label) 567 if tag: 568 plot_title = "{} [{}]".format(plot_title, tag) 569 plot_title = "{}, threshold @ {} [ms]".format(plot_title, threshold_ms) 570 571 # Activations intervals over time 572 axes = plt.subplot(gs[0,0:2]) 573 axes.set_title(plot_title) 574 wkp_df.plot(style='g+', logy=False, ax=axes) 575 576 axes.axhline(threshold_ms / 1000., linestyle='--', color='g') 577 self._trace.analysis.status.plotOverutilized(axes) 578 axes.legend(loc='lower center', ncol=2) 579 axes.set_xlim(self._trace.x_min, self._trace.x_max) 580 581 # Cumulative distribution of all activations intervals 582 axes = plt.subplot(gs[1,0]) 583 cdf.df.plot(ax=axes, legend=False, xlim=(0,None), 584 title='Activations CDF ({:.1f}% within {} [ms] threshold)'\ 585 .format(100. * cdf.below, threshold_ms)) 586 axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5); 587 axes.axhline(y=cdf.below, linewidth=1, color='r', linestyle='--') 588 589 # Histogram of all activations intervals 590 axes = plt.subplot(gs[1,1]) 591 wkp_df.plot(kind='hist', bins=bins, ax=axes, 592 xlim=(0,ymax), legend=False, 593 title='Activation intervals histogram ({} bins, {} [ms] green threshold)'\ 594 .format(bins, threshold_ms)); 595 axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5); 596 597 # Save generated plots into datadir 598 task_name = re.sub('[\ :/]', '_', td.label) 599 figname = '{}/{}task_activations_{}_{}.png'\ 600 .format(self._trace.plots_dir, self._trace.plots_prefix, 601 td.pid, task_name) 602 pl.savefig(figname, bbox_inches='tight') 603 604 # Return statistics 605 stats_df = wkp_df.describe(percentiles=[0.95, 0.99]) 606 label = '{:.1f}%'.format(100. * cdf.below) 607 stats = { label : cdf.threshold } 608 return stats_df.append(pd.DataFrame( 609 stats.values(), columns=['activation_interval'], index=stats.keys())) 610 611 612 def plotRuntimes(self, task, tag=None, threshold_ms=8, bins=64): 613 """ 614 Plots "running times" for the specified task 615 616 A "running time" is the sum of all the time intervals a task executed 617 in between a wakeup and the next sleep (or exit). 618 A set of plots is generated to report: 619 - Running times at block time: every time a task blocks a 620 point is plotted to represent the cumulative time the task has be 621 running since its last wakeup 622 - Running time cumulative function: reports the cumulative 623 function of the running times. 624 - Running times histogram: reports a 64 bins histogram of 625 the running times. 626 627 All plots are parameterized based on the value of threshold_ms, which 628 can be used to filter running times bigger than 2 times this value. 629 Such a threshold is useful to filter out from the plots outliers thus 630 focusing the analysis in the most critical periodicity under analysis. 631 The number and percentage of discarded samples is reported in output. 632 A default threshold of 16 [ms] is used, which is useful for example to 633 analyze a 60Hz rendering pipelines. 634 635 A PNG of the generated plots is generated and saved in the same folder 636 where the trace is. 637 638 :param task: the task to report latencies for 639 :type task: int or list(str) 640 641 :param tag: a string to add to the plot title 642 :type tag: str 643 644 :param threshold_ms: the minimum acceptable [ms] value to report 645 graphically in the generated plots 646 :type threshold_ms: int or float 647 648 :param bins: number of bins to be used for the runtime's histogram 649 :type bins: int 650 651 :returns: a DataFrame with statistics on ploted running times 652 """ 653 654 if not self._trace.hasEvents('sched_switch'): 655 self._log.warning('Event [sched_switch] not found, ' 656 'plot DISABLED!') 657 return 658 if not self._trace.hasEvents('sched_wakeup'): 659 self._log.warning('Event [sched_wakeup] not found, ' 660 'plot DISABLED!') 661 return 662 663 # Get task data 664 td = self._getTaskData(task) 665 if not td: 666 return None 667 668 # Load runtime data 669 run_df = self._dfg_runtimes_df(td.pid) 670 if run_df is None: 671 return None 672 self._log.info('Found: %5d activations for [%s]', 673 len(run_df), td.label) 674 675 # Disregard data above two time the specified threshold 676 y_max = (2 * threshold_ms) / 1000. 677 len_tot = len(run_df) 678 run_df = run_df[run_df.running_time <= y_max] 679 len_plt = len(run_df) 680 if len_plt < len_tot: 681 len_dif = len_tot - len_plt 682 len_pct = 100. * len_dif / len_tot 683 self._log.warning('Discarding {} running times (above 2 x threshold_ms, ' 684 '{:.1f}% of the overall activations)'\ 685 .format(len_dif, len_pct)) 686 ymax = 1.1 * run_df.running_time.max() 687 688 # Build the series for the CDF 689 cdf = self._getCDF(run_df.running_time, (threshold_ms / 1000.)) 690 self._log.info('%.1f %% samples below %d [ms] threshold', 691 100. * cdf.below, threshold_ms) 692 693 # Setup plots 694 gs = gridspec.GridSpec(2, 2, height_ratios=[2,1], width_ratios=[1,1]) 695 plt.figure(figsize=(16, 8)) 696 697 plot_title = "[{}]: running times (@ block time)".format(td.label) 698 if tag: 699 plot_title = "{} [{}]".format(plot_title, tag) 700 plot_title = "{}, threshold @ {} [ms]".format(plot_title, threshold_ms) 701 702 # Running time over time 703 axes = plt.subplot(gs[0,0:2]) 704 axes.set_title(plot_title) 705 run_df.plot(style='g+', logy=False, ax=axes) 706 707 axes.axhline(threshold_ms / 1000., linestyle='--', color='g') 708 self._trace.analysis.status.plotOverutilized(axes) 709 axes.legend(loc='lower center', ncol=2) 710 axes.set_xlim(self._trace.x_min, self._trace.x_max) 711 712 # Cumulative distribution of all running times 713 axes = plt.subplot(gs[1,0]) 714 cdf.df.plot(ax=axes, legend=False, xlim=(0,None), 715 title='Runtime CDF ({:.1f}% within {} [ms] threshold)'\ 716 .format(100. * cdf.below, threshold_ms)) 717 axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5); 718 axes.axhline(y=cdf.below, linewidth=1, color='r', linestyle='--') 719 720 # Histogram of all running times 721 axes = plt.subplot(gs[1,1]) 722 run_df.plot(kind='hist', bins=bins, ax=axes, 723 xlim=(0,ymax), legend=False, 724 title='Latency histogram ({} bins, {} [ms] green threshold)'\ 725 .format(bins, threshold_ms)); 726 axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5); 727 728 # Save generated plots into datadir 729 task_name = re.sub('[\ :/]', '_', td.label) 730 figname = '{}/{}task_runtimes_{}_{}.png'\ 731 .format(self._trace.plots_dir, self._trace.plots_prefix, 732 td.pid, task_name) 733 pl.savefig(figname, bbox_inches='tight') 734 735 # Return statistics 736 stats_df = run_df.describe(percentiles=[0.95, 0.99]) 737 label = '{:.1f}%'.format(100. * cdf.below) 738 stats = { label : cdf.threshold } 739 return stats_df.append(pd.DataFrame( 740 stats.values(), columns=['running_time'], index=stats.keys())) 741 742 743############################################################################### 744# Utility Methods 745############################################################################### 746 747 @memoized 748 def _getTaskData(self, task): 749 750 # Get task PID 751 if isinstance(task, str): 752 task_pids = self._trace.getTaskByName(task) 753 if len(task_pids) == 0: 754 self._log.warning('No tasks found with name [%s]', task) 755 return None 756 757 task_pid = task_pids[0] 758 if len(task_pids) > 1: 759 self._log.warning('Multiple PIDs for task named [%s]', task) 760 for pid in task_pids: 761 self._log.warning(' %5d : %s', pid, 762 ','.join(self._trace.getTaskByPid(pid))) 763 self._log.warning('Returning stats only for PID: %d', 764 task_pid) 765 task_names = self._trace.getTaskByPid(task_pid) 766 767 # Get task name 768 elif isinstance(task, int): 769 task_pid = task 770 task_names = self._trace.getTaskByPid(task_pid) 771 if len(task_names) == 0: 772 self._log.warning('No tasks found with name [%s]', task) 773 return None 774 775 else: 776 raise ValueError("Task must be either an int or str") 777 778 task_label = "{}: {}".format(task_pid, ', '.join(task_names)) 779 return TaskData(task_pid, task_names, task_label) 780 781 @memoized 782 def _taskState(self, state): 783 try: 784 state = int(state) 785 except ValueError: 786 # State already converted to symbol 787 return state 788 789 # Tasks STATE flags (Linux 3.18) 790 TASK_STATES = { 791 0: "R", # TASK_RUNNING 792 1: "S", # TASK_INTERRUPTIBLE 793 2: "D", # TASK_UNINTERRUPTIBLE 794 4: "T", # __TASK_STOPPED 795 8: "t", # __TASK_TRACED 796 16: "X", # EXIT_DEAD 797 32: "Z", # EXIT_ZOMBIE 798 64: "x", # TASK_DEAD 799 128: "K", # TASK_WAKEKILL 800 256: "W", # TASK_WAKING 801 512: "P", # TASK_PARKED 802 1024: "N", # TASK_NOLOAD 803 } 804 kver = self._trace.platform['kernel']['parts'] 805 if kver is None: 806 kver = (3, 18) 807 self._log.info('Parsing sched_switch states assuming kernel v%d.%d', 808 kver[0], kver[1]) 809 if kver >= (4, 8): 810 TASK_STATES[2048] = "n" # TASK_NEW 811 TASK_MAX_STATE = 2 * max(TASK_STATES) 812 813 res = "R" 814 if state & (TASK_MAX_STATE - 1) != 0: 815 res = "" 816 for key in TASK_STATES.keys(): 817 if key & state: 818 res += TASK_STATES[key] 819 if state & TASK_MAX_STATE: 820 res += "+" 821 else: 822 res = '|'.join(res) 823 return res 824 825 826 def _getCDF(self, data, threshold): 827 """ 828 Build the "Cumulative Distribution Function" (CDF) for the given data 829 """ 830 831 # Build the series of sorted values 832 ser = data.sort_values() 833 if len(ser) < 1000: 834 # Append again the last (and largest) value. 835 # This step is important especially for small sample sizes 836 # in order to get an unbiased CDF 837 ser = ser.append(pd.Series(ser.iloc[-1])) 838 df = pd.Series(np.linspace(0., 1., len(ser)), index=ser) 839 840 # Compute percentage of samples above/below the specified threshold 841 below = float(max(df[:threshold])) 842 above = 1 - below 843 return CDF(df, threshold, above, below) 844 845 846# vim :set tabstop=4 shiftwidth=4 expandtab 847