1# Copyright 2014 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import copy 6import json 7import logging 8import os 9import shutil 10import subprocess 11import tempfile 12 13 14_TRACING_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 15 os.path.pardir, os.path.pardir) 16_TRACE2HTML_PATH = os.path.join(_TRACING_DIR, 'bin', 'trace2html') 17 18 19class NonSerializableTraceData(Exception): 20 """Raised when raw trace data cannot be serialized to TraceData.""" 21 pass 22 23 24class TraceDataPart(object): 25 """TraceData can have a variety of events. 26 27 These are called "parts" and are accessed by the following fixed field names. 28 """ 29 def __init__(self, raw_field_name): 30 self._raw_field_name = raw_field_name 31 32 def __repr__(self): 33 return 'TraceDataPart("%s")' % self._raw_field_name 34 35 @property 36 def raw_field_name(self): 37 return self._raw_field_name 38 39 def __eq__(self, other): 40 return self.raw_field_name == other.raw_field_name 41 42 def __hash__(self): 43 return hash(self.raw_field_name) 44 45 46ANDROID_PROCESS_DATA_PART = TraceDataPart('androidProcessDump') 47ATRACE_PART = TraceDataPart('systemTraceEvents') 48BATTOR_TRACE_PART = TraceDataPart('powerTraceAsString') 49CHROME_TRACE_PART = TraceDataPart('traceEvents') 50CPU_TRACE_DATA = TraceDataPart('cpuSnapshots') 51INSPECTOR_TRACE_PART = TraceDataPart('inspectorTimelineEvents') 52SURFACE_FLINGER_PART = TraceDataPart('surfaceFlinger') 53TAB_ID_PART = TraceDataPart('tabIds') 54TELEMETRY_PART = TraceDataPart('telemetry') 55WALT_TRACE_PART = TraceDataPart('waltTraceEvents') 56 57ALL_TRACE_PARTS = {ANDROID_PROCESS_DATA_PART, 58 ATRACE_PART, 59 BATTOR_TRACE_PART, 60 CHROME_TRACE_PART, 61 CPU_TRACE_DATA, 62 INSPECTOR_TRACE_PART, 63 SURFACE_FLINGER_PART, 64 TAB_ID_PART, 65 TELEMETRY_PART} 66 67ALL_TRACE_PARTS_RAW_NAMES = set(k.raw_field_name for k in ALL_TRACE_PARTS) 68 69def _HasTraceFor(part, raw): 70 assert isinstance(part, TraceDataPart) 71 if part.raw_field_name not in raw: 72 return False 73 return len(raw[part.raw_field_name]) > 0 74 75 76def _GetFilePathForTrace(trace, dir_path): 77 """ Return path to a file that contains |trace|. 78 79 Note: if |trace| is an instance of TraceFileHandle, this reuses the trace path 80 that the trace file handle holds. Otherwise, it creates a new trace file 81 in |dir_path| directory. 82 """ 83 if isinstance(trace, TraceFileHandle): 84 return trace.file_path 85 with tempfile.NamedTemporaryFile(mode='w', dir=dir_path, delete=False) as fp: 86 if isinstance(trace, basestring): 87 fp.write(trace) 88 elif isinstance(trace, dict) or isinstance(trace, list): 89 json.dump(trace, fp) 90 else: 91 raise TypeError('Trace is of unknown type.') 92 return fp.name 93 94 95class TraceData(object): 96 """ TraceData holds a collection of traces from multiple sources. 97 98 A TraceData can have multiple active parts. Each part represents traces 99 collected from a different trace agent. 100 """ 101 def __init__(self): 102 """Creates TraceData from the given data.""" 103 self._raw_data = {} 104 self._events_are_safely_mutable = False 105 106 def _SetFromBuilder(self, d): 107 self._raw_data = d 108 self._events_are_safely_mutable = True 109 110 @property 111 def events_are_safely_mutable(self): 112 """Returns true if the events in this value are completely sealed. 113 114 Some importers want to take complex fields out of the TraceData and add 115 them to the model, changing them subtly as they do so. If the TraceData 116 was constructed with data that is shared with something outside the trace 117 data, for instance a test harness, then this mutation is unexpected. But, 118 if the values are sealed, then mutating the events is a lot faster. 119 120 We know if events are sealed if the value came from a string, or if the 121 value came from a TraceDataBuilder. 122 """ 123 return self._events_are_safely_mutable 124 125 @property 126 def active_parts(self): 127 return {p for p in ALL_TRACE_PARTS if p.raw_field_name in self._raw_data} 128 129 def HasTracesFor(self, part): 130 return _HasTraceFor(part, self._raw_data) 131 132 def GetTracesFor(self, part): 133 """ Return the list of traces for |part| in string or dictionary forms. 134 135 Note: since this API return the traces that can be directly accessed in 136 memory, it may require lots of memory usage as some of the trace can be 137 very big. 138 For references, we have cases where Telemetry is OOM'ed because the memory 139 required for processing the trace in Python is too big (crbug.com/672097). 140 """ 141 assert isinstance(part, TraceDataPart) 142 if not self.HasTracesFor(part): 143 return [] 144 traces_list = self._raw_data[part.raw_field_name] 145 # Since this API return the traces in memory form, and since the memory 146 # bottleneck of Telemetry is for keeping trace in memory, there is no uses 147 # in keeping the on-disk form of tracing beyond this point. Hence we convert 148 # all traces for part of form TraceFileHandle to the JSON form. 149 for i, data in enumerate(traces_list): 150 if isinstance(data, TraceFileHandle): 151 traces_list[i] = data.AsTraceData() 152 return traces_list 153 154 def GetTraceFor(self, part): 155 assert isinstance(part, TraceDataPart) 156 traces = self.GetTracesFor(part) 157 assert len(traces) == 1 158 return traces[0] 159 160 def CleanUpAllTraces(self): 161 """ Remove all the traces that this has handles to. 162 163 Those include traces stored in memory & on disk. After invoking this, 164 one can no longer uses this object for collecting the traces. 165 """ 166 for traces_list in self._raw_data.itervalues(): 167 for trace in traces_list: 168 if isinstance(trace, TraceFileHandle): 169 trace.Clean() 170 self._raw_data = {} 171 172 def Serialize(self, file_path, trace_title=''): 173 """Serializes the trace result to |file_path|. 174 175 """ 176 if not self._raw_data: 177 logging.warning('No traces to convert to html.') 178 return 179 temp_dir = tempfile.mkdtemp() 180 trace_files = [] 181 try: 182 trace_size_data = {} 183 for part, traces_list in self._raw_data.iteritems(): 184 for trace in traces_list: 185 path = _GetFilePathForTrace(trace, temp_dir) 186 trace_size_data.setdefault(part, 0) 187 trace_size_data[part] += os.path.getsize(path) 188 trace_files.append(path) 189 logging.info('Trace sizes in bytes: %s', trace_size_data) 190 191 cmd = (['python', _TRACE2HTML_PATH] + trace_files + 192 ['--output', file_path] + ['--title', trace_title]) 193 subprocess.check_output(cmd) 194 finally: 195 shutil.rmtree(temp_dir) 196 197 198class TraceFileHandle(object): 199 """A trace file handle object allows storing trace data on disk. 200 201 TraceFileHandle API allows one to collect traces from Chrome into disk instead 202 of keeping them in memory. This is important for keeping memory usage of 203 Telemetry low to avoid OOM (see: 204 https://github.com/catapult-project/catapult/issues/3119). 205 206 The fact that this uses a file underneath to store tracing data means the 207 callsite is repsonsible for discarding the file when they no longer need the 208 tracing data. Call TraceFileHandle.Clean when you done using this object. 209 """ 210 def __init__(self): 211 self._backing_file = None 212 self._file_path = None 213 self._trace_data = None 214 215 def Open(self): 216 assert not self._backing_file and not self._file_path 217 self._backing_file = tempfile.NamedTemporaryFile(delete=False, mode='a') 218 219 def AppendTraceData(self, partial_trace_data): 220 assert isinstance(partial_trace_data, basestring) 221 self._backing_file.write(partial_trace_data) 222 223 @property 224 def file_path(self): 225 assert self._file_path, ( 226 'Either the handle need to be closed first or this handle is cleaned') 227 return self._file_path 228 229 def Close(self): 230 assert self._backing_file 231 self._backing_file.close() 232 self._file_path = self._backing_file.name 233 self._backing_file = None 234 235 def AsTraceData(self): 236 """Get the object form of trace data that this handle manages. 237 238 *Warning: this can have large memory footprint if the trace data is big. 239 240 Since this requires the in-memory form of the trace, it is no longer useful 241 to still keep the backing file underneath, invoking this will also discard 242 the file to avoid the risk of leaking the backing trace file. 243 """ 244 if self._trace_data: 245 return self._trace_data 246 assert self._file_path 247 with open(self._file_path) as f: 248 self._trace_data = json.load(f) 249 self.Clean() 250 return self._trace_data 251 252 def Clean(self): 253 """Remove the backing file used for storing trace on disk. 254 255 This should be called when and only when you no longer need to use 256 TraceFileHandle. 257 """ 258 assert self._file_path 259 os.remove(self._file_path) 260 self._file_path = None 261 262 263class TraceDataBuilder(object): 264 """TraceDataBuilder helps build up a trace from multiple trace agents. 265 266 TraceData is supposed to be immutable, but it is useful during recording to 267 have a mutable version. That is TraceDataBuilder. 268 """ 269 def __init__(self): 270 self._raw_data = {} 271 272 def AsData(self): 273 if self._raw_data == None: 274 raise Exception('Can only AsData once') 275 data = TraceData() 276 data._SetFromBuilder(self._raw_data) 277 self._raw_data = None 278 return data 279 280 def AddTraceFor(self, part, trace): 281 assert isinstance(part, TraceDataPart), part 282 if part == CHROME_TRACE_PART: 283 assert (isinstance(trace, dict) or 284 isinstance(trace, list) or 285 isinstance(trace, TraceFileHandle)) 286 else: 287 assert (isinstance(trace, basestring) or 288 isinstance(trace, dict) or 289 isinstance(trace, list)) 290 291 if self._raw_data == None: 292 raise Exception('Already called AsData() on this builder.') 293 294 self._raw_data.setdefault(part.raw_field_name, []) 295 self._raw_data[part.raw_field_name].append(trace) 296 297 def HasTracesFor(self, part): 298 return _HasTraceFor(part, self._raw_data) 299 300 301def CreateTraceDataFromRawData(raw_data): 302 """Convenient method for creating a TraceData object from |raw_data|. 303 This is mostly used for testing. 304 305 Args: 306 raw_data can be: 307 + A dictionary that repsents multiple trace parts. Keys of the 308 dictionary must always contain 'traceEvents', as chrome trace 309 must always present. 310 + A list that represents Chrome trace events. 311 + JSON string of either above. 312 313 """ 314 raw_data = copy.deepcopy(raw_data) 315 if isinstance(raw_data, basestring): 316 json_data = json.loads(raw_data) 317 else: 318 json_data = raw_data 319 320 b = TraceDataBuilder() 321 if not json_data: 322 return b.AsData() 323 if isinstance(json_data, dict): 324 assert 'traceEvents' in json_data, 'Only raw chrome trace is supported' 325 trace_parts_keys = [] 326 for k in json_data: 327 if k != 'traceEvents' and k in ALL_TRACE_PARTS_RAW_NAMES: 328 trace_parts_keys.append(k) 329 b.AddTraceFor(TraceDataPart(k), json_data[k]) 330 # Delete the data for extra keys to form trace data for Chrome part only. 331 for k in trace_parts_keys: 332 del json_data[k] 333 b.AddTraceFor(CHROME_TRACE_PART, json_data) 334 elif isinstance(json_data, list): 335 b.AddTraceFor(CHROME_TRACE_PART, {'traceEvents': json_data}) 336 else: 337 raise NonSerializableTraceData('Unrecognized data format.') 338 return b.AsData() 339 340