1# Copyright 2016 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4import atexit 5import json 6import os 7import sys 8import time 9import threading 10import multiprocessing 11import multiprocessing_shim 12 13from py_trace_event.trace_event_impl import perfetto_trace_writer 14from py_trace_event import trace_time 15 16from py_utils import lock 17 18 19# Trace file formats: 20 21# Legacy format: json list of events. 22# Events can be written from multiple processes, but since no process 23# can be sure that it is the last one, nobody writes the closing ']'. 24# So the resulting file is not technically correct json. 25JSON = "json" 26 27# Full json with events and metadata. 28# This format produces correct json ready to feed into TraceDataBuilder. 29# Note that it is the responsibility of the user of py_trace_event to make sure 30# that trace_disable() is called after all child processes have finished. 31JSON_WITH_METADATA = "json_with_metadata" 32 33# Perfetto protobuf trace format. 34PROTOBUF = "protobuf" 35 36 37_lock = threading.Lock() 38 39_enabled = False 40_log_file = None 41 42_cur_events = [] # events that have yet to be buffered 43_benchmark_metadata = {} 44 45# Default timestamp values for clock snapshot. 46# If a ClockSnapshot message with these default values is emitted, Telemetry 47# events' time will not be translated by trace processor, because both 48# TELEMETRY and BOOTTIME timestamps are the same. This allows the old-style 49# synchronization (using clock_sync events) to take place in catapult. 50# If we want to actually synchronize Telemetry with other trace producers 51# via clock snapshots in trace processor, we should set _boottime_ts to the 52# actual BOOTTIME of the device and _emit_clock_sync to False. In this case, 53# trace processor will translate both Chrome's and telemetry's timestamps 54# to the device time (BOOTTIME) during proto-to-json conversion, and catapult's 55# clock synchronization will not take place because we do not emit the 56# clock_sync event. 57# Note that we can't use both synchronization methods at the same time 58# because that will lead to wrong results. 59_telemetry_ts = trace_time.Now() 60_boottime_ts = _telemetry_ts 61_emit_clock_sync = True 62 63_tls = threading.local() # tls used to detect forking/etc 64_atexit_regsitered_for_pid = None 65 66_control_allowed = True 67 68_original_multiprocessing_process = multiprocessing.Process 69 70class TraceException(Exception): 71 pass 72 73def _note(msg, *args): 74 pass 75# print "%i: %s" % (os.getpid(), msg) 76 77 78def _locked(fn): 79 def locked_fn(*args,**kwargs): 80 _lock.acquire() 81 try: 82 ret = fn(*args,**kwargs) 83 finally: 84 _lock.release() 85 return ret 86 return locked_fn 87 88def _disallow_tracing_control(): 89 global _control_allowed 90 _control_allowed = False 91 92def trace_enable(log_file=None, format=None): 93 """Enable tracing. 94 95 Args: 96 log_file: file to write trace into. Can be a file-like object, 97 a name of file, or None. If None, file name is constructed 98 from executable name. 99 format: trace file format. See trace_event.py for available options. 100 """ 101 global _emit_clock_sync 102 if format is None: 103 format = JSON 104 # Can only write clock snapshots in protobuf format. In all other formats 105 # should use clock_syncs. 106 if format != PROTOBUF: 107 _emit_clock_sync = True 108 _trace_enable(log_file, format) 109 110def _write_header(): 111 if _format == PROTOBUF: 112 tid = threading.current_thread().ident 113 perfetto_trace_writer.write_clock_snapshot( 114 output=_log_file, 115 tid=tid, 116 telemetry_ts=_telemetry_ts, 117 boottime_ts=_boottime_ts, 118 ) 119 perfetto_trace_writer.write_thread_descriptor_event( 120 output=_log_file, 121 pid=os.getpid(), 122 tid=tid, 123 ts=trace_time.Now(), 124 ) 125 perfetto_trace_writer.write_event( 126 output=_log_file, 127 ph="M", 128 category="process_argv", 129 name="process_argv", 130 ts=trace_time.Now(), 131 args={"argv": sys.argv}, 132 tid=tid, 133 ) 134 else: 135 if _format == JSON: 136 _log_file.write('[') 137 elif _format == JSON_WITH_METADATA: 138 _log_file.write('{"traceEvents": [\n') 139 else: 140 raise TraceException("Unknown format: %s" % _format) 141 json.dump({ 142 "ph": "M", 143 "category": "process_argv", 144 "pid": os.getpid(), 145 "tid": threading.current_thread().ident, 146 "ts": trace_time.Now(), 147 "name": "process_argv", 148 "args": {"argv": sys.argv}, 149 }, _log_file) 150 _log_file.write('\n') 151 152 153@_locked 154def _trace_enable(log_file=None, format=None): 155 global _format 156 _format = format 157 global _enabled 158 if _enabled: 159 raise TraceException("Already enabled") 160 if not _control_allowed: 161 raise TraceException("Tracing control not allowed in child processes.") 162 _enabled = True 163 global _log_file 164 if log_file == None: 165 if sys.argv[0] == '': 166 n = 'trace_event' 167 else: 168 n = sys.argv[0] 169 if _format == PROTOBUF: 170 log_file = open("%s.pb" % n, "ab", False) 171 else: 172 log_file = open("%s.json" % n, "ab", False) 173 elif isinstance(log_file, basestring): 174 log_file = open("%s" % log_file, "ab", False) 175 elif not hasattr(log_file, 'fileno'): 176 raise TraceException( 177 "Log file must be None, a string, or file-like object with a fileno()") 178 179 _note("trace_event: tracelog name is %s" % log_file) 180 181 _log_file = log_file 182 with lock.FileLock(_log_file, lock.LOCK_EX): 183 _log_file.seek(0, os.SEEK_END) 184 185 lastpos = _log_file.tell() 186 creator = lastpos == 0 187 if creator: 188 _note("trace_event: Opened new tracelog, lastpos=%i", lastpos) 189 _write_header() 190 else: 191 _note("trace_event: Opened existing tracelog") 192 _log_file.flush() 193 # Monkeypatch in our process replacement for the multiprocessing.Process class 194 if multiprocessing.Process != multiprocessing_shim.ProcessShim: 195 multiprocessing.Process = multiprocessing_shim.ProcessShim 196 197@_locked 198def trace_flush(): 199 if _enabled: 200 _flush() 201 202@_locked 203def trace_disable(): 204 global _enabled 205 if not _control_allowed: 206 raise TraceException("Tracing control not allowed in child processes.") 207 if not _enabled: 208 return 209 _enabled = False 210 _flush(close=True) 211 # Clear the collected interned data so that the next trace session 212 # could start from a clean state. 213 perfetto_trace_writer.reset_global_state() 214 multiprocessing.Process = _original_multiprocessing_process 215 216def _write_cur_events(): 217 if _format == PROTOBUF: 218 for e in _cur_events: 219 perfetto_trace_writer.write_event( 220 output=_log_file, 221 ph=e["ph"], 222 category=e["category"], 223 name=e["name"], 224 ts=e["ts"], 225 args=e["args"], 226 tid=threading.current_thread().ident, 227 ) 228 elif _format in (JSON, JSON_WITH_METADATA): 229 for e in _cur_events: 230 _log_file.write(",\n") 231 json.dump(e, _log_file) 232 else: 233 raise TraceException("Unknown format: %s" % _format) 234 del _cur_events[:] 235 236def _write_footer(): 237 if _format in [JSON, PROTOBUF]: 238 # In JSON format we might not be the only process writing to this logfile. 239 # So, we will simply close the file rather than writing the trailing ] that 240 # it technically requires. The trace viewer understands this and 241 # will insert a trailing ] during loading. 242 # In PROTOBUF format there's no need for a footer. The metadata has already 243 # been written in a special proto message. 244 pass 245 elif _format == JSON_WITH_METADATA: 246 _log_file.write('],\n"metadata": ') 247 json.dump(_benchmark_metadata, _log_file) 248 _log_file.write('}') 249 else: 250 raise TraceException("Unknown format: %s" % _format) 251 252def _flush(close=False): 253 global _log_file 254 with lock.FileLock(_log_file, lock.LOCK_EX): 255 _log_file.seek(0, os.SEEK_END) 256 if len(_cur_events): 257 _write_cur_events() 258 if close: 259 _write_footer() 260 _log_file.flush() 261 262 if close: 263 _note("trace_event: Closed") 264 _log_file.close() 265 _log_file = None 266 else: 267 _note("trace_event: Flushed") 268 269@_locked 270def trace_is_enabled(): 271 return _enabled 272 273@_locked 274def add_trace_event(ph, ts, category, name, args=None): 275 global _enabled 276 if not _enabled: 277 return 278 if not hasattr(_tls, 'pid') or _tls.pid != os.getpid(): 279 _tls.pid = os.getpid() 280 global _atexit_regsitered_for_pid 281 if _tls.pid != _atexit_regsitered_for_pid: 282 _atexit_regsitered_for_pid = _tls.pid 283 atexit.register(_trace_disable_atexit) 284 _tls.pid = os.getpid() 285 del _cur_events[:] # we forked, clear the event buffer! 286 tid = threading.current_thread().ident 287 if not tid: 288 tid = os.getpid() 289 _tls.tid = tid 290 291 _cur_events.append({ 292 "ph": ph, 293 "category": category, 294 "pid": _tls.pid, 295 "tid": _tls.tid, 296 "ts": ts, 297 "name": name, 298 "args": args or {}, 299 }); 300 301def trace_begin(name, args=None): 302 add_trace_event("B", trace_time.Now(), "python", name, args) 303 304def trace_end(name, args=None): 305 add_trace_event("E", trace_time.Now(), "python", name, args) 306 307def trace_set_thread_name(thread_name): 308 add_trace_event("M", trace_time.Now(), "__metadata", "thread_name", 309 {"name": thread_name}) 310 311def trace_add_benchmark_metadata( 312 benchmark_start_time_us, 313 story_run_time_us, 314 benchmark_name, 315 benchmark_description, 316 story_name, 317 story_tags, 318 story_run_index, 319 label=None, 320): 321 """Add benchmark metadata to be written to trace file. 322 323 Args: 324 benchmark_start_time_us: Benchmark start time in microseconds. 325 story_run_time_us: Story start time in microseconds. 326 benchmark_name: Name of the benchmark. 327 benchmark_description: Description of the benchmark. 328 story_name: Name of the story. 329 story_tags: List of story tags. 330 story_run_index: Index of the story run. 331 label: Optional label. 332 had_failures: Whether this story run failed. 333 """ 334 global _benchmark_metadata 335 if _format == PROTOBUF: 336 # Write metadata immediately. 337 perfetto_trace_writer.write_metadata( 338 output=_log_file, 339 benchmark_start_time_us=benchmark_start_time_us, 340 story_run_time_us=story_run_time_us, 341 benchmark_name=benchmark_name, 342 benchmark_description=benchmark_description, 343 story_name=story_name, 344 story_tags=story_tags, 345 story_run_index=story_run_index, 346 label=label, 347 ) 348 if _emit_clock_sync: 349 perfetto_trace_writer.write_chrome_metadata( 350 output=_log_file, 351 clock_domain="TELEMETRY", 352 ) 353 elif _format == JSON_WITH_METADATA: 354 # Store metadata to write it in the footer. 355 telemetry_metadata_for_json = { 356 "benchmarkStart": benchmark_start_time_us / 1000.0, 357 "traceStart": story_run_time_us / 1000.0, 358 "benchmarks": [benchmark_name], 359 "benchmarkDescriptions": [benchmark_description], 360 "stories": [story_name], 361 "storyTags": story_tags, 362 "storysetRepeats": [story_run_index], 363 } 364 if label: 365 telemetry_metadata_for_json["labels"] = [label] 366 367 assert _emit_clock_sync 368 _benchmark_metadata = { 369 # TODO(crbug.com/948633): For right now, we use "TELEMETRY" as the 370 # clock domain to guarantee that Telemetry is given its own clock 371 # domain. Telemetry isn't really a clock domain, though: it's a 372 # system that USES a clock domain like LINUX_CLOCK_MONOTONIC or 373 # WIN_QPC. However, there's a chance that a Telemetry controller 374 # running on Linux (using LINUX_CLOCK_MONOTONIC) is interacting 375 # with an Android phone (also using LINUX_CLOCK_MONOTONIC, but 376 # on a different machine). The current logic collapses clock 377 # domains based solely on the clock domain string, but we really 378 # should to collapse based on some (device ID, clock domain ID) 379 # tuple. Giving Telemetry its own clock domain is a work-around 380 # for this. 381 "clock-domain": "TELEMETRY", 382 "telemetry": telemetry_metadata_for_json, 383 } 384 elif _format == JSON: 385 raise TraceException("Can't write metadata in JSON format") 386 else: 387 raise TraceException("Unknown format: %s" % _format) 388 389def trace_set_clock_snapshot(telemetry_ts, boottime_ts): 390 """Set timestamps to be written in a ClockSnapshot message. 391 392 This function must be called before the trace start. When trace starts, 393 a ClockSnapshot message with given timestamps will be written in protobuf 394 format. In json format, nothing will happen. Use clock_sync function 395 for clock synchronization in json format. 396 397 Args: 398 telemetry_ts: BOOTTIME of the device where Telemetry runs. 399 boottime_ts: BOOTTIME of the device where the tested browser runs. 400 """ 401 global _telemetry_ts 402 global _boottime_ts 403 global _emit_clock_sync 404 global _enabled 405 if _enabled: 406 raise TraceException("Can't set the clock snapshot after trace started.") 407 _telemetry_ts = telemetry_ts 408 _boottime_ts = boottime_ts 409 _emit_clock_sync = False 410 411def clock_sync(sync_id, issue_ts=None): 412 """Add a clock sync event to the trace log. 413 414 Adds a clock sync event if the TBMv2-style synchronization is enabled. 415 It's enabled in two cases: 416 1) Trace format is json. 417 2) The clock snapshot was not set before the trace start. 418 419 Args: 420 sync_id: ID of clock sync event. 421 issue_ts: Time at which clock sync was issued, in microseconds. 422 """ 423 if _emit_clock_sync: 424 time_stamp = trace_time.Now() 425 args_to_log = {"sync_id": sync_id} 426 if issue_ts: # Issuer if issue_ts is set, else reciever. 427 assert issue_ts <= time_stamp 428 args_to_log["issue_ts"] = issue_ts 429 add_trace_event("c", time_stamp, "python", "clock_sync", args_to_log) 430 431def _trace_disable_atexit(): 432 trace_disable() 433 434def is_tracing_controllable(): 435 global _control_allowed 436 return _control_allowed 437