1# Copyright 2015-2016 ARM Limited 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# 15 16"""Utility functions for sheye""" 17 18import trappy 19import numpy as np 20 21# pylint fails to recognize numpy members. 22# pylint: disable=no-member 23 24def listify(to_select): 25 """Utitlity function to handle both single and 26 list inputs 27 """ 28 29 if not isinstance(to_select, list): 30 to_select = [to_select] 31 32 return to_select 33 34def init_ftrace(trace): 35 """Initialize the FTrace Object 36 37 :param trace: Path for the trace file 38 or a trace object 39 :type trace: str, :mod:`trappy.ftrace.FTrace` 40 """ 41 42 if isinstance(trace, basestring): 43 return trappy.FTrace(trace) 44 45 elif isinstance(trace, trappy.BareTrace): 46 return trace 47 48 raise ValueError("Invalid trace Object") 49 50def select_window(series, window): 51 """Helper Function to select a portion of 52 pandas time series 53 54 :param series: Input Time Series data 55 :type series: :mod:`pandas.Series` 56 57 :param window: A tuple indicating a time window 58 :type window: tuple 59 """ 60 61 if not window: 62 return series 63 64 start, stop = window 65 ix = series.index 66 selector = ((ix >= start) & (ix <= stop)) 67 window_series = series[selector] 68 return window_series 69 70def area_under_curve(series, sign=None, method="trapz", step="post"): 71 """Return the area under the time series curve (Integral) 72 73 :param series: The time series to be integrated 74 :type series: :mod:`pandas.Series` 75 76 :param sign: Clip the data for the area in positive 77 or negative regions. Can have two values 78 79 - `"+"` 80 - `"="` 81 :type sign: str 82 83 :param method: The method for area calculation. This can 84 be any of the integration methods supported in `numpy` 85 or `rect` 86 :type param: str 87 88 :param step: The step behaviour for `rect` method 89 :type step: str 90 91 *Rectangular Method* 92 93 - Step: Post 94 95 Consider the following time series data 96 97 .. code:: 98 99 2 *----*----*----+ 100 | | 101 1 | *----*----+ 102 | 103 0 *----*----+ 104 0 1 2 3 4 5 6 7 105 106 .. code:: 107 108 import pandas as pd 109 a = [0, 0, 2, 2, 2, 1, 1] 110 s = pd.Series(a) 111 112 The area under the curve is: 113 114 .. math:: 115 116 \sum_{k=0}^{N-1} (x_{k+1} - {x_k}) \\times f(x_k) \\\\ 117 (2 \\times 3) + (1 \\times 2) = 8 118 119 - Step: Pre 120 121 .. code:: 122 123 2 +----*----*----* 124 | | 125 1 | +----*----*----+ 126 | 127 0 *----* 128 0 1 2 3 4 5 6 7 129 130 .. code:: 131 132 import pandas as pd 133 a = [0, 0, 2, 2, 2, 1, 1] 134 s = pd.Series(a) 135 136 The area under the curve is: 137 138 .. math:: 139 140 \sum_{k=1}^{N} (x_k - x_{k-1}) \\times f(x_k) \\\\ 141 (2 \\times 3) + (1 \\times 3) = 9 142 """ 143 144 if sign == "+": 145 series = series.clip_lower(0) 146 elif sign == "=": 147 series = series.clip_upper(0) 148 149 series = series.dropna() 150 151 if method == "rect": 152 153 if step == "post": 154 values = series.values[:-1] 155 elif step == "pre": 156 values = series.values[1:] 157 else: 158 raise ValueError("Invalid Value for step: {}".format(step)) 159 160 return float((values * np.diff(series.index)).sum()) 161 162 if hasattr(np, method): 163 np_integ_method = getattr(np, method) 164 return np_integ_method(series.values, series.index) 165 else: 166 raise ValueError("Invalid method: {}".format(method)) 167 168def interval_sum(series, value=None, step="post"): 169 """A function that returns the sum of the 170 intervals where the value of series is equal to 171 the expected value. Consider the following time 172 series data: 173 174 ====== ======= 175 Time Value 176 ====== ======= 177 0 0 178 1 0 179 2 1 180 3 1 181 4 1 182 5 1 183 8 0 184 9 1 185 10 0 186 11 1 187 12 1 188 ====== ======= 189 190 .. note:: 191 192 The time/index values, in general, may not be 193 uniform. This causes difference in the 194 the values of :func:`interval_sum` for **step-pre** 195 and **step-post** behaviours 196 197 .. code:: 198 199 import pandas 200 201 values = [0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1] 202 index = [0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 12] 203 series = pandas.Series(values, index=index) 204 205 The :func:`interval_sum` for the value 1 is calculated differently 206 for **step-post** and **step-pre** behaviours as follows: 207 208 - **Step-Post** 209 210 211 .. code:: 212 213 1 *----*----*----*-------------+ *----+ *----* 214 | | | | | 215 0 *----*----+ *----+ *----+ 216 0 1 2 3 4 5 6 7 8 9 10 11 12 217 218 .. math:: 219 220 (8-2) + (10-9) + (12-11) = 6 + 1 + 1 = 8 221 222 - **Step-Pre** 223 224 .. code:: 225 226 1 +----*----*----*----* +----* +----*----* 227 | | | | | 228 0 *----* +--------------* +----* 229 0 1 2 3 4 5 6 7 8 9 10 11 12 230 231 .. math:: 232 233 (5-1) + (9-8) + (12-10) = 4 + 1 + 2 = 7 234 235 .. note:: 236 237 The asterisks (*) on the plots above represent the values of the time 238 series data and these do not vary between the two step styles 239 240 :param series: The time series data 241 :type series: :mod:`pandas.Series` 242 243 :param value: The value to checked for in the series. If the 244 value is None, the truth value of the elements in the 245 series will be used 246 :type value: element 247 248 :param step: The step behaviour as described above 249 :: 250 251 step="post" 252 step="pre 253 :type step: str 254 """ 255 256 index = series.index 257 array = series.values 258 259 time_splits = np.append(np.where(np.diff(array) != 0), len(array) - 1) 260 261 prev = 0 262 time = 0 263 step_post = True 264 265 if step == "pre": 266 step_post = False 267 elif step != "post": 268 raise ValueError("Invalid value for step: {}".format(step)) 269 270 for split in time_splits: 271 272 first_val = series.iloc[split] 273 check = (first_val == value) if value else first_val 274 if check: 275 start = prev 276 end = split 277 278 if step_post: 279 end = split + 1 if split < len(series) - 1 else split 280 else: 281 start = prev - 1 if prev > 1 else prev 282 283 time += index[end] - index[start] 284 285 prev = split + 1 286 287 return float(time) 288