1# Copyright 2015-2017 ARM Limited 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# 15 16"""Base class to parse trace.dat dumps""" 17 18import re 19import pandas as pd 20import warnings 21 22from resource import getrusage, RUSAGE_SELF 23 24def _get_free_memory_kb(): 25 try: 26 with open("/proc/meminfo") as f: 27 memfree_line = [l for l in f.readlines() if "MemFree" in l][0] 28 _, num_kb, _ = memfree_line.split() 29 return int(num_kb) 30 except: 31 # Probably either not running on Linux (no /proc/meminfo), or format has 32 # changed (we didn't find num_kb). 33 return None 34 35def trace_parser_explode_array(string, array_lengths): 36 """Explode an array in the trace into individual elements for easy parsing 37 38 Basically, turn :code:`load={1 1 2 2}` into :code:`load0=1 load1=1 load2=2 39 load3=2`. 40 41 :param string: Input string from the trace 42 :type string: str 43 44 :param array_lengths: A dictionary of array names and their 45 expected length. If we get array that's shorter than the expected 46 length, additional keys have to be introduced with value 0 to 47 compensate. 48 :type array_lengths: dict 49 50 For example: 51 :: 52 53 trace_parser_explode_array(string="load={1 2}", 54 array_lengths={"load": 4}) 55 "load0=1 load1=2 load2=0 load3=0" 56 """ 57 58 while True: 59 match = re.search(r"[^ ]+={[^}]+}", string) 60 if match is None: 61 break 62 63 to_explode = match.group() 64 col_basename = re.match(r"([^=]+)=", to_explode).groups()[0] 65 vals_str = re.search(r"{(.+)}", to_explode).groups()[0] 66 vals_array = vals_str.split(' ') 67 68 exploded_str = "" 69 for (idx, val) in enumerate(vals_array): 70 exploded_str += "{}{}={} ".format(col_basename, idx, val) 71 72 vals_added = len(vals_array) 73 if vals_added < array_lengths[col_basename]: 74 for idx in range(vals_added, array_lengths[col_basename]): 75 exploded_str += "{}{}=0 ".format(col_basename, idx) 76 77 exploded_str = exploded_str[:-1] 78 begin_idx = match.start() 79 end_idx = match.end() 80 81 string = string[:begin_idx] + exploded_str + string[end_idx:] 82 83 return string 84 85class Base(object): 86 """Base class to parse trace.dat dumps. 87 88 Don't use directly, create a subclass that has a unique_word class 89 variable. unique_word is a string that can uniquely identify 90 lines in the trace that correspond to this event. This is usually 91 the trace_name (optionally followed by a semicolong, 92 e.g. "sched_switch:") but it can be anything else for trace points 93 generated using trace_printk(). 94 95 :param parse_raw: If :code:`True`, raw trace data (-r option) to 96 trace-cmd will be used 97 98 :param fallback: If :code:`True`, the parsing class will be used 99 only if no other candidate class's unique_word matched. subclasses 100 should override this (for ex. TracingMarkWrite uses it) 101 102 This class acts as a base class for all TRAPpy events 103 104 """ 105 def __init__(self, parse_raw=False, fallback=False): 106 self.fallback = fallback 107 self.tracer = None 108 self.data_frame = pd.DataFrame() 109 self.line_array = [] 110 self.data_array = [] 111 self.time_array = [] 112 self.comm_array = [] 113 self.pid_array = [] 114 self.tgid_array = [] 115 self.cpu_array = [] 116 self.parse_raw = parse_raw 117 self.cached = False 118 119 def finalize_object(self): 120 pass 121 122 def __get_trace_array_lengths(self): 123 """Calculate the lengths of all arrays in the trace 124 125 Returns a dict with the name of each array found in the trace 126 as keys and their corresponding length as value 127 128 """ 129 from collections import defaultdict 130 131 pat_array = re.compile(r"([A-Za-z0-9_]+)={([^}]+)}") 132 133 ret = defaultdict(int) 134 135 for line in self.data_array: 136 while True: 137 match = re.search(pat_array, line) 138 if not match: 139 break 140 141 (array_name, array_elements) = match.groups() 142 143 array_len = len(array_elements.split(' ')) 144 145 if array_len > ret[array_name]: 146 ret[array_name] = array_len 147 148 line = line[match.end():] 149 150 # Stop scanning if the trace doesn't have arrays 151 if len(ret) == 0: 152 break 153 154 return ret 155 156 def append_data(self, time, comm, pid, tgid, cpu, line, data): 157 """Append data parsed from a line to the corresponding arrays 158 159 The :mod:`DataFrame` will be created from this when the whole trace 160 has been parsed. 161 162 :param time: The time for the line that was printed in the trace 163 :type time: float 164 165 :param comm: The command name or the execname from which the trace 166 line originated 167 :type comm: str 168 169 :param pid: The PID of the process from which the trace 170 line originated 171 :type pid: int 172 173 :param data: The data for matching line in the trace 174 :type data: str 175 """ 176 177 self.time_array.append(time) 178 self.comm_array.append(comm) 179 self.pid_array.append(pid) 180 self.tgid_array.append(tgid) 181 self.cpu_array.append(cpu) 182 self.line_array.append(line) 183 self.data_array.append(data) 184 185 def string_cast(self, string, type): 186 """ Attempt to convert string to another type 187 188 Here we attempt to cast string to a type. Currently only 189 integer conversion is supported with future expansion 190 left open to other types. 191 192 :param string: The value to convert. 193 :type string: str 194 195 :param type: The type to convert to. 196 :type type: type 197 """ 198 # Currently this function only supports int conversion 199 if type != int: 200 return 201 # Handle false-positives for negative numbers 202 if not string.lstrip("-").isdigit(): 203 return string 204 return int(string) 205 206 def generate_data_dict(self, data_str): 207 data_dict = {} 208 prev_key = None 209 for field in data_str.split(): 210 if "=" not in field: 211 # Concatenation is supported only for "string" values 212 if type(data_dict[prev_key]) is not str: 213 continue 214 data_dict[prev_key] += ' ' + field 215 continue 216 (key, value) = field.split('=', 1) 217 value = self.string_cast(value, int) 218 data_dict[key] = value 219 prev_key = key 220 return data_dict 221 222 def generate_parsed_data(self): 223 224 # Get a rough idea of how much memory we have to play with 225 CHECK_MEM_COUNT = 10000 226 kb_free = _get_free_memory_kb() 227 starting_maxrss = getrusage(RUSAGE_SELF).ru_maxrss 228 check_memory_usage = True 229 check_memory_count = 1 230 231 for (comm, pid, tgid, cpu, line, data_str) in zip(self.comm_array, self.pid_array, 232 self.tgid_array, self.cpu_array, 233 self.line_array, self.data_array): 234 data_dict = {"__comm": comm, "__pid": pid, "__tgid": tgid, "__cpu": cpu, "__line": line} 235 data_dict.update(self.generate_data_dict(data_str)) 236 237 # When running out of memory, Pandas has been observed to segfault 238 # rather than throwing a proper Python error. 239 # Look at how much memory our process is using and warn if we seem 240 # to be getting close to the system's limit, check it only once 241 # in the beginning and then every CHECK_MEM_COUNT events 242 check_memory_count -= 1 243 if check_memory_usage and check_memory_count == 0: 244 kb_used = (getrusage(RUSAGE_SELF).ru_maxrss - starting_maxrss) 245 if kb_free and kb_used > kb_free * 0.9: 246 warnings.warn("TRAPpy: Appear to be low on memory. " 247 "If errors arise, try providing more RAM") 248 check_memory_usage = False 249 check_memory_count = CHECK_MEM_COUNT 250 251 yield data_dict 252 253 def create_dataframe(self): 254 """Create the final :mod:`pandas.DataFrame`""" 255 if not self.time_array: 256 return 257 258 trace_arr_lengths = self.__get_trace_array_lengths() 259 260 if trace_arr_lengths.items(): 261 for (idx, val) in enumerate(self.data_array): 262 expl_val = trace_parser_explode_array(val, trace_arr_lengths) 263 self.data_array[idx] = expl_val 264 265 time_idx = pd.Index(self.time_array, name="Time") 266 self.data_frame = pd.DataFrame(self.generate_parsed_data(), index=time_idx) 267 268 self.time_array = [] 269 self.line_array = [] 270 self.comm_array = [] 271 self.pid_array = [] 272 self.cpu_array = [] 273 self.data_array = [] 274 275 def write_csv(self, fname): 276 """Write the csv info into a CSV file 277 278 :param fname: The name of the CSV file 279 :type fname: str 280 """ 281 self.data_frame.to_csv(fname) 282 283 def read_csv(self, fname): 284 """Read the csv data into a DataFrame 285 286 :param fname: The name of the CSV file 287 :type fname: str 288 """ 289 self.data_frame = pd.read_csv(fname, index_col = 0) 290 291 def normalize_time(self, basetime): 292 """Substract basetime from the Time of the data frame 293 294 :param basetime: The offset which needs to be subtracted from 295 the time index 296 :type basetime: float 297 """ 298 if basetime and not self.data_frame.empty: 299 self.data_frame.reset_index(inplace=True) 300 self.data_frame["Time"] = self.data_frame["Time"] - basetime 301 self.data_frame.set_index("Time", inplace=True) 302