1# Copyright 2014 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import copy 6import json 7import logging 8import os 9import shutil 10import subprocess 11import tempfile 12import time 13 14 15_TRACING_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 16 os.path.pardir, os.path.pardir) 17_TRACE2HTML_PATH = os.path.join(_TRACING_DIR, 'bin', 'trace2html') 18 19 20class NonSerializableTraceData(Exception): 21 """Raised when raw trace data cannot be serialized to TraceData.""" 22 pass 23 24 25class TraceDataPart(object): 26 """TraceData can have a variety of events. 27 28 These are called "parts" and are accessed by the following fixed field names. 29 """ 30 def __init__(self, raw_field_name): 31 self._raw_field_name = raw_field_name 32 33 def __repr__(self): 34 return 'TraceDataPart("%s")' % self._raw_field_name 35 36 @property 37 def raw_field_name(self): 38 return self._raw_field_name 39 40 def __eq__(self, other): 41 return self.raw_field_name == other.raw_field_name 42 43 def __hash__(self): 44 return hash(self.raw_field_name) 45 46 47ANDROID_PROCESS_DATA_PART = TraceDataPart('androidProcessDump') 48ATRACE_PART = TraceDataPart('systemTraceEvents') 49ATRACE_PROCESS_DUMP_PART = TraceDataPart('atraceProcessDump') 50CHROME_TRACE_PART = TraceDataPart('traceEvents') 51CPU_TRACE_DATA = TraceDataPart('cpuSnapshots') 52INSPECTOR_TRACE_PART = TraceDataPart('inspectorTimelineEvents') 53TELEMETRY_PART = TraceDataPart('telemetry') 54WALT_TRACE_PART = TraceDataPart('waltTraceEvents') 55 56ALL_TRACE_PARTS = {ANDROID_PROCESS_DATA_PART, 57 ATRACE_PART, 58 ATRACE_PROCESS_DUMP_PART, 59 CHROME_TRACE_PART, 60 CPU_TRACE_DATA, 61 INSPECTOR_TRACE_PART, 62 TELEMETRY_PART} 63 64ALL_TRACE_PARTS_RAW_NAMES = set(k.raw_field_name for k in ALL_TRACE_PARTS) 65 66def _HasTraceFor(part, raw): 67 assert isinstance(part, TraceDataPart) 68 if part.raw_field_name not in raw: 69 return False 70 return len(raw[part.raw_field_name]) > 0 71 72 73def _GetFilePathForTrace(trace, dir_path): 74 """ Return path to a file that contains |trace|. 75 76 Note: if |trace| is an instance of TraceFileHandle, this reuses the trace path 77 that the trace file handle holds. Otherwise, it creates a new trace file 78 in |dir_path| directory. 79 """ 80 if isinstance(trace, TraceFileHandle): 81 return trace.file_path 82 with tempfile.NamedTemporaryFile(mode='w', dir=dir_path, delete=False) as fp: 83 if isinstance(trace, basestring): 84 fp.write(trace) 85 elif isinstance(trace, dict) or isinstance(trace, list): 86 json.dump(trace, fp) 87 else: 88 raise TypeError('Trace is of unknown type.') 89 return fp.name 90 91 92class TraceData(object): 93 """ TraceData holds a collection of traces from multiple sources. 94 95 A TraceData can have multiple active parts. Each part represents traces 96 collected from a different trace agent. 97 """ 98 def __init__(self): 99 """Creates TraceData from the given data.""" 100 self._raw_data = {} 101 self._events_are_safely_mutable = False 102 103 def _SetFromBuilder(self, d): 104 self._raw_data = d 105 self._events_are_safely_mutable = True 106 107 @property 108 def events_are_safely_mutable(self): 109 """Returns true if the events in this value are completely sealed. 110 111 Some importers want to take complex fields out of the TraceData and add 112 them to the model, changing them subtly as they do so. If the TraceData 113 was constructed with data that is shared with something outside the trace 114 data, for instance a test harness, then this mutation is unexpected. But, 115 if the values are sealed, then mutating the events is a lot faster. 116 117 We know if events are sealed if the value came from a string, or if the 118 value came from a TraceDataBuilder. 119 """ 120 return self._events_are_safely_mutable 121 122 @property 123 def active_parts(self): 124 return {p for p in ALL_TRACE_PARTS if p.raw_field_name in self._raw_data} 125 126 def HasTracesFor(self, part): 127 return _HasTraceFor(part, self._raw_data) 128 129 def GetTracesFor(self, part): 130 """ Return the list of traces for |part| in string or dictionary forms. 131 132 Note: since this API return the traces that can be directly accessed in 133 memory, it may require lots of memory usage as some of the trace can be 134 very big. 135 For references, we have cases where Telemetry is OOM'ed because the memory 136 required for processing the trace in Python is too big (crbug.com/672097). 137 """ 138 assert isinstance(part, TraceDataPart) 139 if not self.HasTracesFor(part): 140 return [] 141 traces_list = self._raw_data[part.raw_field_name] 142 # Since this API return the traces in memory form, and since the memory 143 # bottleneck of Telemetry is for keeping trace in memory, there is no uses 144 # in keeping the on-disk form of tracing beyond this point. Hence we convert 145 # all traces for part of form TraceFileHandle to the JSON form. 146 for i, data in enumerate(traces_list): 147 if isinstance(data, TraceFileHandle): 148 traces_list[i] = data.AsTraceData() 149 return traces_list 150 151 def GetTraceFor(self, part): 152 assert isinstance(part, TraceDataPart) 153 traces = self.GetTracesFor(part) 154 assert len(traces) == 1 155 return traces[0] 156 157 def CleanUpAllTraces(self): 158 """ Remove all the traces that this has handles to. 159 160 Those include traces stored in memory & on disk. After invoking this, 161 one can no longer uses this object for collecting the traces. 162 """ 163 for traces_list in self._raw_data.itervalues(): 164 for trace in traces_list: 165 if isinstance(trace, TraceFileHandle): 166 trace.Clean() 167 self._raw_data = {} 168 169 def Serialize(self, file_path, trace_title=''): 170 """Serializes the trace result to |file_path|. 171 172 """ 173 if not self._raw_data: 174 logging.warning('No traces to convert to html.') 175 return 176 temp_dir = tempfile.mkdtemp() 177 trace_files = [] 178 try: 179 trace_size_data = {} 180 for part, traces_list in self._raw_data.iteritems(): 181 for trace in traces_list: 182 path = _GetFilePathForTrace(trace, temp_dir) 183 trace_size_data.setdefault(part, 0) 184 trace_size_data[part] += os.path.getsize(path) 185 trace_files.append(path) 186 logging.info('Trace sizes in bytes: %s', trace_size_data) 187 188 start_time = time.time() 189 cmd = ( 190 ['python', _TRACE2HTML_PATH] + trace_files + 191 ['--output', file_path] + ['--title', trace_title]) 192 subprocess.check_output(cmd) 193 194 elapsed_time = time.time() - start_time 195 logging.info('trace2html finished in %.02f seconds.', elapsed_time) 196 finally: 197 shutil.rmtree(temp_dir) 198 199 200class TraceFileHandle(object): 201 """A trace file handle object allows storing trace data on disk. 202 203 TraceFileHandle API allows one to collect traces from Chrome into disk instead 204 of keeping them in memory. This is important for keeping memory usage of 205 Telemetry low to avoid OOM (see: 206 https://github.com/catapult-project/catapult/issues/3119). 207 208 The fact that this uses a file underneath to store tracing data means the 209 callsite is repsonsible for discarding the file when they no longer need the 210 tracing data. Call TraceFileHandle.Clean when you done using this object. 211 """ 212 def __init__(self): 213 self._backing_file = None 214 self._file_path = None 215 self._trace_data = None 216 217 def Open(self): 218 assert not self._backing_file and not self._file_path 219 self._backing_file = tempfile.NamedTemporaryFile(delete=False, mode='a') 220 221 def AppendTraceData(self, partial_trace_data): 222 assert isinstance(partial_trace_data, basestring) 223 self._backing_file.write(partial_trace_data) 224 225 @property 226 def file_path(self): 227 assert self._file_path, ( 228 'Either the handle need to be closed first or this handle is cleaned') 229 return self._file_path 230 231 def Close(self): 232 assert self._backing_file 233 self._backing_file.close() 234 self._file_path = self._backing_file.name 235 self._backing_file = None 236 237 def AsTraceData(self): 238 """Get the object form of trace data that this handle manages. 239 240 *Warning: this can have large memory footprint if the trace data is big. 241 242 Since this requires the in-memory form of the trace, it is no longer useful 243 to still keep the backing file underneath, invoking this will also discard 244 the file to avoid the risk of leaking the backing trace file. 245 """ 246 if self._trace_data: 247 return self._trace_data 248 assert self._file_path 249 with open(self._file_path) as f: 250 self._trace_data = json.load(f) 251 self.Clean() 252 return self._trace_data 253 254 def Clean(self): 255 """Remove the backing file used for storing trace on disk. 256 257 This should be called when and only when you no longer need to use 258 TraceFileHandle. 259 """ 260 assert self._file_path 261 os.remove(self._file_path) 262 self._file_path = None 263 264 265class TraceDataBuilder(object): 266 """TraceDataBuilder helps build up a trace from multiple trace agents. 267 268 TraceData is supposed to be immutable, but it is useful during recording to 269 have a mutable version. That is TraceDataBuilder. 270 """ 271 def __init__(self): 272 self._raw_data = {} 273 274 def AsData(self): 275 if self._raw_data is None: 276 raise Exception('Can only AsData once') 277 data = TraceData() 278 data._SetFromBuilder(self._raw_data) 279 self._raw_data = None 280 return data 281 282 def AddTraceFor(self, part, trace): 283 assert isinstance(part, TraceDataPart), part 284 if part == CHROME_TRACE_PART: 285 assert (isinstance(trace, dict) or 286 isinstance(trace, list) or 287 isinstance(trace, TraceFileHandle)) 288 else: 289 assert (isinstance(trace, basestring) or 290 isinstance(trace, dict) or 291 isinstance(trace, list)) 292 293 if self._raw_data is None: 294 raise Exception('Already called AsData() on this builder.') 295 296 self._raw_data.setdefault(part.raw_field_name, []) 297 self._raw_data[part.raw_field_name].append(trace) 298 299 def HasTracesFor(self, part): 300 return _HasTraceFor(part, self._raw_data) 301 302 303def CreateTraceDataFromRawData(raw_data): 304 """Convenient method for creating a TraceData object from |raw_data|. 305 This is mostly used for testing. 306 307 Args: 308 raw_data can be: 309 + A dictionary that repsents multiple trace parts. Keys of the 310 dictionary must always contain 'traceEvents', as chrome trace 311 must always present. 312 + A list that represents Chrome trace events. 313 + JSON string of either above. 314 315 """ 316 raw_data = copy.deepcopy(raw_data) 317 if isinstance(raw_data, basestring): 318 json_data = json.loads(raw_data) 319 else: 320 json_data = raw_data 321 322 b = TraceDataBuilder() 323 if not json_data: 324 return b.AsData() 325 if isinstance(json_data, dict): 326 assert 'traceEvents' in json_data, 'Only raw chrome trace is supported' 327 trace_parts_keys = [] 328 for k in json_data: 329 if k != 'traceEvents' and k in ALL_TRACE_PARTS_RAW_NAMES: 330 trace_parts_keys.append(k) 331 b.AddTraceFor(TraceDataPart(k), json_data[k]) 332 # Delete the data for extra keys to form trace data for Chrome part only. 333 for k in trace_parts_keys: 334 del json_data[k] 335 b.AddTraceFor(CHROME_TRACE_PART, json_data) 336 elif isinstance(json_data, list): 337 b.AddTraceFor(CHROME_TRACE_PART, {'traceEvents': json_data}) 338 else: 339 raise NonSerializableTraceData('Unrecognized data format.') 340 return b.AsData() 341