1# Copyright 2015-2017 ARM Limited 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# 15 16"""Grammar module allows the user to easily define relations 17between data events and perform basic logical and arithmetic 18operations on the data. The parser also handles super-indexing 19and variable forwarding. 20""" 21from pyparsing import Literal, delimitedList, Optional, oneOf, nums,\ 22 alphas, alphanums, Forward, Word, opAssoc, operatorPrecedence, Combine, Group 23import importlib 24import pandas as pd 25import types 26import numpy as np 27from trappy.stats.Topology import Topology 28from trappy.stats import StatConf 29from trappy.utils import handle_duplicate_index, listify 30 31 32def parse_num(tokens): 33 """Parser function for numerical data 34 35 :param tokens: The grammar tokens 36 :type tokens: list 37 """ 38 return float(tokens[0]) 39 40# Suppressed Literals 41LPAREN = Literal("(").suppress() 42RPAREN = Literal(")").suppress() 43COLON = Literal(":").suppress() 44EXP_START = Literal("[").suppress() 45EXP_END = Literal("]").suppress() 46 47# Grammar Tokens 48 49# DataFrame Accessor 50INTEGER = Combine(Optional(oneOf("+ -")) + Word(nums))\ 51 .setParseAction(parse_num) 52REAL = Combine(Optional(oneOf("+ -")) + Word(nums) + "." + 53 Optional(Word(nums)) + 54 Optional(oneOf("e E") + Optional(oneOf("+ -")) + Word(nums)))\ 55 .setParseAction(parse_num) 56 57# Generic Identifier 58IDENTIFIER = Word(alphas + '_', alphanums + '_') 59# Python Like Function Name 60FUNC_NAME = delimitedList(IDENTIFIER, delim=".", combine=True) 61# Exponentiation operators 62EXPONENTIATION_OPS = "**" 63# Unary Operators 64UNARY_OPS = oneOf("+ -") 65# Multiplication/Division Operators 66MULT_OPS = oneOf("* / // %") 67# Addition/Subtraction Operators 68SUM_OPS = oneOf("+ -") 69# Relational Operators 70REL_OPS = oneOf("> < >= <= == !=") 71# Logical Operators 72LOGICAL_OPS = oneOf("&& || & |") 73 74# Operator to function mapping 75OPERATOR_MAP = { 76 "+": lambda a, b: a + b, 77 "-": lambda a, b: a - b, 78 "*": lambda a, b: a * b, 79 "/": lambda a, b: a / b, 80 "//": lambda a, b: a // b, 81 "%": lambda a, b: a % b, 82 "**": lambda a, b: a ** b, 83 ">": lambda a, b: a > b, 84 "<": lambda a, b: a < b, 85 ">=": lambda a, b: a >= b, 86 "<=": lambda a, b: a <= b, 87 "||": lambda a, b: a or b, 88 "&&": lambda a, b: a and b, 89 "|": lambda a, b: a | b, 90 "==": lambda a, b: a == b, 91 "!=": lambda a, b: a != b, 92 "&": lambda a, b: a & b 93} 94 95 96def eval_unary_op(tokens): 97 """Unary Op Evaluation 98 99 :param tokens: The grammar tokens 100 :type tokens: list 101 """ 102 103 params = tokens[0] 104 if params[0] == "-": 105 return -1 * params[1] 106 else: 107 return params[1] 108 109 110def iterate_binary_ops(tokens): 111 """An iterator for Binary Operation tokens 112 113 :param tokens: The grammar tokens 114 :type tokens: list 115 """ 116 117 itr = iter(tokens) 118 while True: 119 try: 120 yield(itr.next(), itr.next()) 121 except StopIteration: 122 break 123 124 125def eval_binary_op(tokens): 126 """Evaluate Binary operators 127 128 :param tokens: The grammar tokens 129 :type tokens: list 130 """ 131 132 params = tokens[0] 133 result = params[0] 134 135 for opr, val in iterate_binary_ops(params[1:]): 136 result = OPERATOR_MAP[opr](result, val) 137 138 return result 139 140 141def str_to_attr(cls_str): 142 """Bring the attr specified into current scope 143 and return a handler 144 145 :param cls_str: A string representing the class 146 :type cls_str: str 147 148 :return: A class object 149 """ 150 attr_name = cls_str.rsplit(".", 1) 151 if len(attr_name) == 2: 152 module_name, attr_name = attr_name 153 mod = importlib.import_module(module_name) 154 return getattr(mod, attr_name) 155 else: 156 attr_name = attr_name[0] 157 return globals()[attr_name] 158 159 160def get_parse_expression(parse_func, parse_var_id): 161 """return a parse expression with for the 162 input parseActions 163 """ 164 165 var_id = Group( 166 FUNC_NAME + COLON + IDENTIFIER) | REAL | INTEGER | IDENTIFIER 167 var_id.setParseAction(parse_var_id) 168 169 # Forward declaration for an Arithmetic Expression 170 arith_expr = Forward() 171 func_call = Group( 172 FUNC_NAME + 173 LPAREN + 174 Optional( 175 Group( 176 delimitedList(arith_expr))) + 177 RPAREN) 178 # An Arithmetic expression can have a var_id or 179 # a function call as an operand 180 # pylint: disable=expression-not-assigned 181 arith_expr << operatorPrecedence(func_call | var_id, 182 [ 183 (EXPONENTIATION_OPS, 2, opAssoc.LEFT, 184 eval_binary_op), 185 (UNARY_OPS, 1, 186 opAssoc.RIGHT, eval_unary_op), 187 (MULT_OPS, 2, opAssoc.LEFT, 188 eval_binary_op), 189 (SUM_OPS, 2, opAssoc.LEFT, 190 eval_binary_op), 191 (REL_OPS, 2, opAssoc.LEFT, 192 eval_binary_op), 193 (LOGICAL_OPS, 2, 194 opAssoc.LEFT, eval_binary_op) 195 ]) 196 197 # pylint: enable=expression-not-assigned 198 # Argument expression for a function call 199 # An argument to a function can be an 200 # IDENTIFIER, Arithmetic expression, REAL number, INTEGER or a 201 # Function call itself 202 func_call.setParseAction(parse_func) 203 return arith_expr 204 205 206class Parser(object): 207 208 """A parser class for solving simple 209 data accesses and super-indexing data 210 211 :param data: Trace Object 212 :type data: instance of :mod:`trappy.ftrace.BareTrace` or a child 213 class (like :mod:`trappy.ftrace.FTrace`) 214 215 :param pvars: A dictionary of variables that need to be 216 accessed from within the grammar 217 :type pvars: dict 218 219 :param method: The method to be used for reindexing data 220 This can be one of the standas :mod:`pandas.DataFrame` 221 methods (eg. pad, bfill, nearest). The default is pad 222 or use the last valid observation. 223 :type method: str 224 225 :param limit: The number of indices a value will be propagated 226 when reindexing. The default is None 227 :type limit: int 228 229 :param fill: Whether to fill the NaNs in the data. 230 The default value is True. 231 :type fill: bool 232 233 :param window: A window of time in which to apply the data 234 accesses. By default the data accesses happen accross the 235 whole trace. With the window parameter you can limit it to a 236 window of time inside the trace. The first element of the 237 tuple is the starting time and the second the ending time (set 238 to None for end of trace). 239 240 :type window: tuple 241 242 :param filters: Restrict the parsing to the rows that match the 243 specified criteria. For Example: 244 :: 245 246 filters = 247 { 248 "pid": 3338, 249 "cpu": [0, 2, 4], 250 } 251 252 will only consider rows whose pid column is 3338 and cpu is 253 either 0, 2 or 4. 254 :type filters: dict 255 256 - **Operators** 257 258 +----------------+----------------------+---------------+ 259 | Operation | operator | Associativity | 260 +================+======================+===============+ 261 | Exponentiation | \*\* | Left | 262 +----------------+----------------------+---------------+ 263 |Unary | \- | Right | 264 +----------------+----------------------+---------------+ 265 | Multiply/Divide| \*, /, //, % | Left | 266 +----------------+----------------------+---------------+ 267 | Add/Subtract | +, \-, | Left | 268 +----------------+----------------------+---------------+ 269 | Comparison | >, <, >=, <=, ==, != | Left | 270 +----------------+----------------------+---------------+ 271 | Logical | &&, ||, \|, & | Left | 272 +----------------+----------------------+---------------+ 273 274 - **Data Accessors** 275 276 Since the goal of the grammar is to provide an 277 easy language to access and compare data 278 from a :mod:`trappy.trace.FTrace` object. The parser provides 279 a simple notation to access this data. 280 281 *Statically Defined Events* 282 :: 283 284 import trappy 285 from trappy.stats.grammar import Parser 286 287 trace = trappy.FTrace("path/to/trace/file") 288 parser = Parser(trace) 289 parser.solve("trappy.thermal.Thermal:temp * 2") 290 291 *Aliasing* 292 :: 293 294 import trappy 295 from trappy.stats.grammar import Parser 296 297 pvars = {"THERMAL": trappy.thermal.Thermal} 298 trace = trappy.FTrace("path/to/trace/file") 299 parser = Parser(trace, pvars=pvars) 300 parser.solve("THERMAL:temp * 2") 301 302 *Using Event Name* 303 :: 304 305 import trappy 306 from trappy.stats.grammar import Parser 307 trace = trappy.FTrace("path/to/trace/file") 308 parser = Parser(trace) 309 parser.solve("thermal:temp * 2") 310 311 The event :mod:`trappy.thermal.Thermal` is aliased 312 as **thermal** in the grammar 313 314 *Dynamic Events* 315 :: 316 317 import trappy 318 from trappy.stats.grammar import Parser 319 320 # Register Dynamic Event 321 cls = trappy.register_dynamic_ftrace("my_unique_word", "event_name") 322 323 pvars = {"CUSTOM": cls} 324 trace = trappy.FTrace("path/to/trace/file") 325 parser = Parser(trace, pvars=pvars) 326 parser.solve("CUSTOM:col * 2") 327 328 .. seealso:: :mod:`trappy.dynamic.register_dynamic_ftrace` 329 330 """ 331 332 def __init__(self, data, pvars=None, window=(0, None), filters=None, **kwargs): 333 if pvars is None: 334 pvars = {} 335 336 self.data = data 337 self._pvars = pvars 338 self._accessor = Group( 339 FUNC_NAME + COLON + IDENTIFIER).setParseAction(self._pre_process) 340 self._inspect = Group( 341 FUNC_NAME + COLON + IDENTIFIER).setParseAction(self._parse_for_info) 342 self._parse_expr = get_parse_expression( 343 self._parse_func, self._parse_var_id) 344 self._agg_df = pd.DataFrame() 345 self._pivot_set = set() 346 self._limit = kwargs.get("limit", StatConf.REINDEX_LIMIT_DEFAULT) 347 self._method = kwargs.get("method", StatConf.REINDEX_METHOD_DEFAULT) 348 self._fill = kwargs.get("fill", StatConf.NAN_FILL_DEFAULT) 349 self._window = window 350 self._filters = filters 351 352 def solve(self, expr): 353 """Parses and solves the input expression 354 355 :param expr: The input expression 356 :type expr: str 357 358 :return: The return type may vary depending on 359 the expression. For example: 360 361 **Vector** 362 :: 363 364 import trappy 365 from trappy.stats.grammar import Parser 366 367 trace = trappy.FTrace("path/to/trace/file") 368 parser = Parser(trace) 369 parser.solve("trappy.thermal.Thermal:temp * 2") 370 371 **Scalar** 372 :: 373 374 import trappy 375 from trappy.stats.grammar import Parser 376 377 trace = trappy.FTrace("path/to/trace/file") 378 parser = Parser(trace) 379 parser.solve("numpy.mean(trappy.thermal.Thermal:temp)") 380 381 **Vector Mask** 382 :: 383 384 import trappy 385 from trappy.stats.grammar import Parser 386 387 trace = trappy.FTrace("path/to/trace/file") 388 parser = Parser(trace) 389 parser.solve("trappy.thermal.Thermal:temp > 65000") 390 """ 391 392 # Pre-process accessors for indexing 393 self._accessor.searchString(expr) 394 return self._parse_expr.parseString(expr)[0] 395 396 397 """ 398 399 # Pre-process accessors for indexing 400 self._accessor.searchString(expr) 401 return self._parse_expr.parseString(expr)[0] 402 403 404 """ 405 406 # Pre-process accessors for indexing 407 self._accessor.searchString(expr) 408 return self._parse_expr.parseString(expr)[0] 409 410 def _pivot(self, cls, column): 411 """Pivot Data for concatenation""" 412 413 data_frame = self._get_data_frame(cls) 414 if data_frame.empty: 415 raise ValueError("No events found for {}".format(cls.name)) 416 417 data_frame = handle_duplicate_index(data_frame) 418 new_index = self._agg_df.index.union(data_frame.index) 419 420 if hasattr(cls, "pivot") and cls.pivot: 421 pivot = cls.pivot 422 pivot_vals = list(np.unique(data_frame[pivot].values)) 423 data = {} 424 425 426 for val in pivot_vals: 427 data[val] = data_frame[data_frame[pivot] == val][[column]] 428 if len(self._agg_df): 429 data[val] = data[val].reindex( 430 index=new_index, 431 method=self._method, 432 limit=self._limit) 433 434 return pd.concat(data, axis=1).swaplevel(0, 1, axis=1) 435 436 if len(self._agg_df): 437 data_frame = data_frame.reindex( 438 index=new_index, 439 method=self._method, 440 limit=self._limit) 441 442 return pd.concat({StatConf.GRAMMAR_DEFAULT_PIVOT: data_frame[ 443 [column]]}, axis=1).swaplevel(0, 1, axis=1) 444 445 def _pre_process(self, tokens): 446 """Pre-process accessors for super-indexing""" 447 448 params = tokens[0] 449 if params[1] in self._agg_df.columns: 450 return self._agg_df[params[1]] 451 452 event = params[0] 453 column = params[1] 454 455 if event in self._pvars: 456 cls = self._pvars[event] 457 elif event in self.data.class_definitions: 458 cls = self.data.class_definitions[event] 459 else: 460 try: 461 cls = str_to_attr(event) 462 except KeyError: 463 raise ValueError( 464 "Can't find parser class for event {}".format(event)) 465 466 data_frame = self._pivot(cls, column) 467 self._agg_df = pd.concat( 468 [self._agg_df, data_frame], axis=1) 469 470 if self._fill: 471 self._agg_df = self._agg_df.fillna(method="pad") 472 473 return self._agg_df[params[1]] 474 475 def _parse_for_info(self, tokens): 476 """Parse Action for inspecting data accessors""" 477 478 params = tokens[0] 479 cls = params[0] 480 column = params[1] 481 info = {} 482 info["pivot"] = None 483 info["pivot_values"] = None 484 485 if cls in self._pvars: 486 cls = self._pvars[cls] 487 elif cls in self.data.class_definitions: 488 cls = self.data.class_definitions[cls] 489 else: 490 cls = str_to_attr(cls) 491 492 data_frame = self._get_data_frame(cls) 493 494 info["class"] = cls 495 info["length"] = len(data_frame) 496 if hasattr(cls, "pivot") and cls.pivot: 497 info["pivot"] = cls.pivot 498 info["pivot_values"] = list(np.unique(data_frame[cls.pivot])) 499 info["column"] = column 500 info["column_present"] = column in data_frame.columns 501 return info 502 503 def _parse_var_id(self, tokens): 504 """A function to parse a variable identifier 505 """ 506 507 params = tokens[0] 508 try: 509 return float(params) 510 except (ValueError, TypeError): 511 try: 512 return self._pvars[params] 513 except KeyError: 514 return self._agg_df[params[1]] 515 516 def _parse_func(self, tokens): 517 """A function to parse a function string""" 518 519 params = tokens[0] 520 func_name = params[0] 521 if func_name in self._pvars and isinstance( 522 self._pvars[func_name], 523 types.FunctionType): 524 func = self._pvars[func_name] 525 else: 526 func = str_to_attr(params[0]) 527 return func(*params[1]) 528 529 def _get_data_frame(self, cls): 530 """Get the data frame from the BareTrace object, applying the window 531 and the filters""" 532 533 data_frame = getattr(self.data, cls.name).data_frame 534 535 if data_frame.empty: 536 return data_frame 537 elif self._window[1] is None: 538 data_frame = data_frame.loc[self._window[0]:] 539 else: 540 data_frame = data_frame.loc[self._window[0]:self._window[1]] 541 542 if self._filters: 543 criterion = pd.Series([True] * len(data_frame), 544 index=data_frame.index) 545 546 for filter_col, wanted_vals in self._filters.iteritems(): 547 try: 548 dfr_col = data_frame[filter_col] 549 except KeyError: 550 continue 551 552 criterion &= dfr_col.isin(listify(wanted_vals)) 553 554 data_frame = data_frame[criterion] 555 556 return data_frame 557 558 def ref(self, mask): 559 """Reference super indexed data with a boolean mask 560 561 :param mask: A boolean :mod:`pandas.Series` that 562 can be used to reference the aggregated data in 563 the parser 564 :type mask: :mod:`pandas.Series` 565 566 :return: aggregated_data[mask] 567 """ 568 569 return self._agg_df[mask] 570 571 def inspect(self, accessor): 572 """A function to inspect the accessor for information 573 574 :param accessor: A data accessor of the format 575 <event>:<column> 576 :type accessor: str 577 578 :return: A dictionary of information 579 """ 580 return self._inspect.parseString(accessor)[0] 581