1#!/usr/bin/python3 2# Copyright 2019 the V8 project authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6# Runs chromium/src/run_benchmark for a given story and extracts the generated 7# runtime call stats. 8 9import argparse 10import csv 11import json 12import glob 13import os 14import pathlib 15import re 16import tabulate 17import shutil 18import statistics 19import subprocess 20import sys 21import tempfile 22 23from callstats_groups import RUNTIME_CALL_STATS_GROUPS 24 25 26JSON_FILE_EXTENSION=".pb_converted.json" 27 28def parse_args(): 29 parser = argparse.ArgumentParser( 30 description="Run story and collect runtime call stats.") 31 parser.add_argument("story", metavar="story", nargs=1, help="story to run") 32 parser.add_argument( 33 "--group", 34 dest="group", 35 action="store_true", 36 help="group common stats together into buckets") 37 parser.add_argument( 38 "-r", 39 "--repeats", 40 dest="repeats", 41 metavar="N", 42 action="store", 43 type=int, 44 default=1, 45 help="number of times to run the story") 46 parser.add_argument( 47 "-v", 48 "--verbose", 49 dest="verbose", 50 action="store_true", 51 help="output benchmark runs to stdout") 52 parser.add_argument( 53 "--device", 54 dest="device", 55 action="store", 56 help="device to run the test on. Passed directly to run_benchmark") 57 parser.add_argument( 58 "-d", 59 "--dir", 60 dest="dir", 61 action="store", 62 help=("directory to look for already generated output in. This must " 63 "already exists and it won't re-run the benchmark")) 64 parser.add_argument( 65 "-f", 66 "--format", 67 dest="format", 68 action="store", 69 choices=["csv", "table"], 70 help="output as CSV") 71 parser.add_argument( 72 "-o", 73 "--output", 74 metavar="FILE", 75 dest="out_file", 76 action="store", 77 help="write table to FILE rather stdout") 78 parser.add_argument( 79 "--browser", 80 dest="browser", 81 metavar="BROWSER_TYPE", 82 action="store", 83 default="release", 84 help=("Passed directly to --browser option of run_benchmark. Ignored if " 85 "-executable is used")) 86 parser.add_argument( 87 "-e", 88 "--executable", 89 dest="executable", 90 metavar="EXECUTABLE", 91 action="store", 92 help=("path to executable to run. If not given it will pass '--browser " 93 "release' to run_benchmark")) 94 parser.add_argument( 95 "--chromium-dir", 96 dest="chromium_dir", 97 metavar="DIR", 98 action="store", 99 default=".", 100 help=("path to chromium directory. If not given, the script must be run " 101 "inside the chromium/src directory")) 102 parser.add_argument( 103 "--js-flags", dest="js_flags", action="store", help="flags to pass to v8") 104 parser.add_argument( 105 "--extra-browser-args", 106 dest="browser_args", 107 action="store", 108 help="flags to pass to chrome") 109 parser.add_argument( 110 "--benchmark", 111 dest="benchmark", 112 action="store", 113 default="v8.browsing_desktop", 114 help="benchmark to run") 115 parser.add_argument( 116 "--stdev", 117 dest="stdev", 118 action="store_true", 119 help="adds columns for the standard deviation") 120 parser.add_argument( 121 "--filter", 122 dest="filter", 123 action="append", 124 help="useable with --group to only show buckets specified by filter") 125 parser.add_argument( 126 "--retain", 127 dest="retain", 128 action="store", 129 default="json", 130 choices=["none", "json", "all"], 131 help=("controls artifacts to be retained after run. With none, all files " 132 "are deleted; only the json.gz file is retained for each run; and " 133 "all keep all files")) 134 135 return parser.parse_args() 136 137 138def process_trace(trace_file): 139 text_string = pathlib.Path(trace_file).read_text() 140 result = json.loads(text_string) 141 142 output = {} 143 result = result["traceEvents"] 144 for o in result: 145 o = o["args"] 146 if "runtime-call-stats" in o: 147 r = o["runtime-call-stats"] 148 for name in r: 149 count = r[name][0] 150 duration = r[name][1] 151 if name in output: 152 output[name]["count"] += count 153 output[name]["duration"] += duration 154 else: 155 output[name] = {"count": count, "duration": duration} 156 157 return output 158 159 160def run_benchmark(story, 161 repeats=1, 162 output_dir=".", 163 verbose=False, 164 js_flags=None, 165 browser_args=None, 166 chromium_dir=".", 167 executable=None, 168 benchmark="v8.browsing_desktop", 169 device=None, 170 browser="release"): 171 172 orig_chromium_dir = chromium_dir 173 xvfb = os.path.join(chromium_dir, "testing", "xvfb.py") 174 if not os.path.isfile(xvfb): 175 chromium_dir = os.path(chromium_dir, "src") 176 xvfb = os.path.join(chromium_dir, "testing", "xvfb.py") 177 if not os.path.isfile(xvfb): 178 print(("chromium_dir does not point to a valid chromium checkout: " + 179 orig_chromium_dir)) 180 sys.exit(1) 181 182 command = [ 183 xvfb, 184 os.path.join(chromium_dir, "tools", "perf", "run_benchmark"), 185 "run", 186 "--story", 187 story, 188 "--pageset-repeat", 189 str(repeats), 190 "--output-dir", 191 output_dir, 192 "--intermediate-dir", 193 os.path.join(output_dir, "artifacts"), 194 benchmark, 195 ] 196 197 if executable: 198 command += ["--browser-executable", executable] 199 else: 200 command += ["--browser", browser] 201 202 if device: 203 command += ["--device", device] 204 if browser_args: 205 command += ["--extra-browser-args", browser_args] 206 if js_flags: 207 command += ["--js-flags", js_flags] 208 209 if not benchmark.startswith("v8."): 210 # Most benchmarks by default don't collect runtime call stats so enable them 211 # manually. 212 categories = [ 213 "v8", 214 "disabled-by-default-v8.runtime_stats", 215 ] 216 217 command += ["--extra-chrome-categories", ",".join(categories)] 218 219 print("Output directory: %s" % output_dir) 220 stdout = "" 221 print(f"Running: {' '.join(command)}\n") 222 proc = subprocess.Popen( 223 command, 224 stdout=subprocess.PIPE, 225 stderr=subprocess.PIPE, 226 universal_newlines=True) 227 proc.stderr.close() 228 status_matcher = re.compile(r"\[ +(\w+) +\]") 229 for line in iter(proc.stdout.readline, ""): 230 stdout += line 231 match = status_matcher.match(line) 232 if verbose or match: 233 print(line, end="") 234 235 proc.stdout.close() 236 237 if proc.wait() != 0: 238 print("\nrun_benchmark failed:") 239 # If verbose then everything has already been printed. 240 if not verbose: 241 print(stdout) 242 sys.exit(1) 243 244 print("\nrun_benchmark completed") 245 246 247def write_output(f, table, headers, run_count, format="table"): 248 if format == "csv": 249 # strip new lines from CSV output 250 headers = [h.replace("\n", " ") for h in headers] 251 writer = csv.writer(f) 252 writer.writerow(headers) 253 writer.writerows(table) 254 else: 255 # First column is name, and then they alternate between counts and durations 256 summary_count = len(headers) - 2 * run_count - 1 257 floatfmt = ("",) + (".0f", ".2f") * run_count + (".2f",) * summary_count 258 f.write(tabulate.tabulate(table, headers=headers, floatfmt=floatfmt)) 259 f.write("\n") 260 261 262class Row: 263 264 def __init__(self, name, run_count): 265 self.name = name 266 self.durations = [0] * run_count 267 self.counts = [0] * run_count 268 self.mean_duration = None 269 self.mean_count = None 270 self.stdev_duration = None 271 self.stdev_count = None 272 273 def __repr__(self): 274 data_str = ", ".join( 275 str((c, d)) for (c, d) in zip(self.counts, self.durations)) 276 return (f"{self.name}: {data_str}, mean_count: {self.mean_count}, " + 277 f"mean_duration: {self.mean_duration}") 278 279 def add_data(self, counts, durations): 280 self.counts = counts 281 self.durations = durations 282 283 def add_data_point(self, run, count, duration): 284 self.counts[run] = count 285 self.durations[run] = duration 286 287 def prepare(self, stdev=False): 288 if len(self.durations) > 1: 289 self.mean_duration = statistics.mean(self.durations) 290 self.mean_count = statistics.mean(self.counts) 291 if stdev: 292 self.stdev_duration = statistics.stdev(self.durations) 293 self.stdev_count = statistics.stdev(self.counts) 294 295 def as_list(self): 296 l = [self.name] 297 for (c, d) in zip(self.counts, self.durations): 298 l += [c, d] 299 if self.mean_duration is not None: 300 l += [self.mean_count] 301 if self.stdev_count is not None: 302 l += [self.stdev_count] 303 l += [self.mean_duration] 304 if self.stdev_duration is not None: 305 l += [self.stdev_duration] 306 return l 307 308 def key(self): 309 if self.mean_duration is not None: 310 return self.mean_duration 311 else: 312 return self.durations[0] 313 314 315class Bucket: 316 317 def __init__(self, name, run_count): 318 self.name = name 319 self.run_count = run_count 320 self.data = {} 321 self.table = None 322 self.total_row = None 323 324 def __repr__(self): 325 s = "Bucket: " + self.name + " {\n" 326 if self.table: 327 s += "\n ".join(str(row) for row in self.table) + "\n" 328 elif self.data: 329 s += "\n ".join(str(row) for row in self.data.values()) + "\n" 330 if self.total_row: 331 s += " " + str(self.total_row) + "\n" 332 return s + "}" 333 334 def add_data_point(self, name, run, count, duration): 335 if name not in self.data: 336 self.data[name] = Row(name, self.run_count) 337 338 self.data[name].add_data_point(run, count, duration) 339 340 def prepare(self, stdev=False): 341 if self.data: 342 for row in self.data.values(): 343 row.prepare(stdev) 344 345 self.table = sorted(self.data.values(), key=Row.key) 346 self.total_row = Row("Total", self.run_count) 347 self.total_row.add_data([ 348 sum(r.counts[i] 349 for r in self.data.values()) 350 for i in range(0, self.run_count) 351 ], [ 352 sum(r.durations[i] 353 for r in self.data.values()) 354 for i in range(0, self.run_count) 355 ]) 356 self.total_row.prepare(stdev) 357 358 def as_list(self, add_bucket_titles=True, filter=None): 359 t = [] 360 if filter is None or self.name in filter: 361 if add_bucket_titles: 362 t += [["\n"], [self.name]] 363 t += [r.as_list() for r in self.table] 364 t += [self.total_row.as_list()] 365 return t 366 367 368def collect_buckets(story, group=True, repeats=1, output_dir="."): 369 if group: 370 groups = RUNTIME_CALL_STATS_GROUPS 371 else: 372 groups = [] 373 374 buckets = {} 375 376 for i in range(0, repeats): 377 story_dir = f"{story.replace(':', '_')}_{i + 1}" 378 trace_dir = os.path.join(output_dir, "artifacts", story_dir, "trace", 379 "traceEvents") 380 381 # run_benchmark now dumps two files: a .pb.gz file and a .pb_converted.json 382 # file. We only need the latter. 383 trace_file_glob = os.path.join(trace_dir, "*" + JSON_FILE_EXTENSION) 384 trace_files = glob.glob(trace_file_glob) 385 if not trace_files: 386 print("Could not find *%s file in %s" % (JSON_FILE_EXTENSION, trace_dir)) 387 sys.exit(1) 388 if len(trace_files) > 1: 389 print("Expecting one file but got: %s" % trace_files) 390 sys.exit(1) 391 392 trace_file = trace_files[0] 393 394 output = process_trace(trace_file) 395 for name in output: 396 bucket_name = "Other" 397 for group in groups: 398 if group[1].match(name): 399 bucket_name = group[0] 400 break 401 402 value = output[name] 403 if bucket_name not in buckets: 404 bucket = Bucket(bucket_name, repeats) 405 buckets[bucket_name] = bucket 406 else: 407 bucket = buckets[bucket_name] 408 409 bucket.add_data_point(name, i, value["count"], value["duration"] / 1000.0) 410 return buckets 411 412 413def create_table(buckets, record_bucket_names=True, filter=None): 414 table = [] 415 for bucket in buckets.values(): 416 table += bucket.as_list( 417 add_bucket_titles=record_bucket_names, filter=filter) 418 return table 419 420 421def main(): 422 args = parse_args() 423 story = args.story[0] 424 425 retain = args.retain 426 if args.dir is not None: 427 output_dir = args.dir 428 if not os.path.isdir(output_dir): 429 print("Specified output directory does not exist: " % output_dir) 430 sys.exit(1) 431 else: 432 output_dir = tempfile.mkdtemp(prefix="runtime_call_stats_") 433 run_benchmark( 434 story, 435 repeats=args.repeats, 436 output_dir=output_dir, 437 verbose=args.verbose, 438 js_flags=args.js_flags, 439 browser_args=args.browser_args, 440 chromium_dir=args.chromium_dir, 441 benchmark=args.benchmark, 442 executable=args.executable, 443 browser=args.browser, 444 device=args.device) 445 446 try: 447 buckets = collect_buckets( 448 story, group=args.group, repeats=args.repeats, output_dir=output_dir) 449 450 for b in buckets.values(): 451 b.prepare(args.stdev) 452 453 table = create_table( 454 buckets, record_bucket_names=args.group, filter=args.filter) 455 456 headers = [""] + ["Count", "Duration\n(ms)"] * args.repeats 457 if args.repeats > 1: 458 if args.stdev: 459 headers += [ 460 "Count\nMean", "Count\nStdev", "Duration\nMean (ms)", 461 "Duration\nStdev (ms)" 462 ] 463 else: 464 headers += ["Count\nMean", "Duration\nMean (ms)"] 465 466 if args.out_file: 467 with open(args.out_file, "w", newline="") as f: 468 write_output(f, table, headers, args.repeats, args.format) 469 else: 470 write_output(sys.stdout, table, headers, args.repeats, args.format) 471 finally: 472 if retain == "none": 473 shutil.rmtree(output_dir) 474 elif retain == "json": 475 # Delete all files bottom up except ones ending in JSON_FILE_EXTENSION and 476 # attempt to delete subdirectories (ignoring errors). 477 for dir_name, subdir_list, file_list in os.walk( 478 output_dir, topdown=False): 479 for file_name in file_list: 480 if not file_name.endswith(JSON_FILE_EXTENSION): 481 os.remove(os.path.join(dir_name, file_name)) 482 for subdir in subdir_list: 483 try: 484 os.rmdir(os.path.join(dir_name, subdir)) 485 except OSError: 486 pass 487 488 489if __name__ == "__main__": 490 sys.exit(main()) 491