1#!/usr/bin/env python 2# Copyright 2016 gRPC authors. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15"""Run performance tests locally or remotely.""" 16 17from __future__ import print_function 18 19import argparse 20import collections 21import itertools 22import json 23import multiprocessing 24import os 25import pipes 26import re 27import subprocess 28import sys 29import tempfile 30import time 31import traceback 32import uuid 33import six 34 35import performance.scenario_config as scenario_config 36import python_utils.jobset as jobset 37import python_utils.report_utils as report_utils 38 39_ROOT = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '../..')) 40os.chdir(_ROOT) 41 42_REMOTE_HOST_USERNAME = 'jenkins' 43 44_SCENARIO_TIMEOUT = 3 * 60 45_WORKER_TIMEOUT = 3 * 60 46_NETPERF_TIMEOUT = 60 47_QUIT_WORKER_TIMEOUT = 2 * 60 48 49 50class QpsWorkerJob: 51 """Encapsulates a qps worker server job.""" 52 53 def __init__(self, spec, language, host_and_port, perf_file_base_name=None): 54 self._spec = spec 55 self.language = language 56 self.host_and_port = host_and_port 57 self._job = None 58 self.perf_file_base_name = perf_file_base_name 59 60 def start(self): 61 self._job = jobset.Job(self._spec, 62 newline_on_success=True, 63 travis=True, 64 add_env={}) 65 66 def is_running(self): 67 """Polls a job and returns True if given job is still running.""" 68 return self._job and self._job.state() == jobset._RUNNING 69 70 def kill(self): 71 if self._job: 72 self._job.kill() 73 self._job = None 74 75 76def create_qpsworker_job(language, 77 shortname=None, 78 port=10000, 79 remote_host=None, 80 perf_cmd=None): 81 cmdline = (language.worker_cmdline() + ['--driver_port=%s' % port]) 82 83 if remote_host: 84 host_and_port = '%s:%s' % (remote_host, port) 85 else: 86 host_and_port = 'localhost:%s' % port 87 88 perf_file_base_name = None 89 if perf_cmd: 90 perf_file_base_name = '%s-%s' % (host_and_port, shortname) 91 # specify -o output file so perf.data gets collected when worker stopped 92 cmdline = perf_cmd + ['-o', '%s-perf.data' % perf_file_base_name 93 ] + cmdline 94 95 worker_timeout = _WORKER_TIMEOUT 96 if remote_host: 97 user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, remote_host) 98 ssh_cmd = ['ssh'] 99 cmdline = ['timeout', '%s' % (worker_timeout + 30)] + cmdline 100 ssh_cmd.extend([ 101 str(user_at_host), 102 'cd ~/performance_workspace/grpc/ && %s' % ' '.join(cmdline) 103 ]) 104 cmdline = ssh_cmd 105 106 jobspec = jobset.JobSpec( 107 cmdline=cmdline, 108 shortname=shortname, 109 timeout_seconds= 110 worker_timeout, # workers get restarted after each scenario 111 verbose_success=True) 112 return QpsWorkerJob(jobspec, language, host_and_port, perf_file_base_name) 113 114 115def create_scenario_jobspec(scenario_json, 116 workers, 117 remote_host=None, 118 bq_result_table=None, 119 server_cpu_load=0): 120 """Runs one scenario using QPS driver.""" 121 # setting QPS_WORKERS env variable here makes sure it works with SSH too. 122 cmd = 'QPS_WORKERS="%s" ' % ','.join(workers) 123 if bq_result_table: 124 cmd += 'BQ_RESULT_TABLE="%s" ' % bq_result_table 125 cmd += 'tools/run_tests/performance/run_qps_driver.sh ' 126 cmd += '--scenarios_json=%s ' % pipes.quote( 127 json.dumps({'scenarios': [scenario_json]})) 128 cmd += '--scenario_result_file=scenario_result.json ' 129 if server_cpu_load != 0: 130 cmd += '--search_param=offered_load --initial_search_value=1000 --targeted_cpu_load=%d --stride=500 --error_tolerance=0.01' % server_cpu_load 131 if remote_host: 132 user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, remote_host) 133 cmd = 'ssh %s "cd ~/performance_workspace/grpc/ && "%s' % ( 134 user_at_host, pipes.quote(cmd)) 135 136 return jobset.JobSpec(cmdline=[cmd], 137 shortname='%s' % scenario_json['name'], 138 timeout_seconds=_SCENARIO_TIMEOUT, 139 shell=True, 140 verbose_success=True) 141 142 143def create_quit_jobspec(workers, remote_host=None): 144 """Runs quit using QPS driver.""" 145 # setting QPS_WORKERS env variable here makes sure it works with SSH too. 146 cmd = 'QPS_WORKERS="%s" cmake/build/qps_json_driver --quit' % ','.join( 147 w.host_and_port for w in workers) 148 if remote_host: 149 user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, remote_host) 150 cmd = 'ssh %s "cd ~/performance_workspace/grpc/ && "%s' % ( 151 user_at_host, pipes.quote(cmd)) 152 153 return jobset.JobSpec(cmdline=[cmd], 154 shortname='shutdown_workers', 155 timeout_seconds=_QUIT_WORKER_TIMEOUT, 156 shell=True, 157 verbose_success=True) 158 159 160def create_netperf_jobspec(server_host='localhost', 161 client_host=None, 162 bq_result_table=None): 163 """Runs netperf benchmark.""" 164 cmd = 'NETPERF_SERVER_HOST="%s" ' % server_host 165 if bq_result_table: 166 cmd += 'BQ_RESULT_TABLE="%s" ' % bq_result_table 167 if client_host: 168 # If netperf is running remotely, the env variables populated by Jenkins 169 # won't be available on the client, but we need them for uploading results 170 # to BigQuery. 171 jenkins_job_name = os.getenv('JOB_NAME') 172 if jenkins_job_name: 173 cmd += 'JOB_NAME="%s" ' % jenkins_job_name 174 jenkins_build_number = os.getenv('BUILD_NUMBER') 175 if jenkins_build_number: 176 cmd += 'BUILD_NUMBER="%s" ' % jenkins_build_number 177 178 cmd += 'tools/run_tests/performance/run_netperf.sh' 179 if client_host: 180 user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, client_host) 181 cmd = 'ssh %s "cd ~/performance_workspace/grpc/ && "%s' % ( 182 user_at_host, pipes.quote(cmd)) 183 184 return jobset.JobSpec(cmdline=[cmd], 185 shortname='netperf', 186 timeout_seconds=_NETPERF_TIMEOUT, 187 shell=True, 188 verbose_success=True) 189 190 191def archive_repo(languages): 192 """Archives local version of repo including submodules.""" 193 cmdline = ['tar', '-cf', '../grpc.tar', '../grpc/'] 194 if 'java' in languages: 195 cmdline.append('../grpc-java') 196 if 'go' in languages: 197 cmdline.append('../grpc-go') 198 if 'node' in languages or 'node_purejs' in languages: 199 cmdline.append('../grpc-node') 200 201 archive_job = jobset.JobSpec(cmdline=cmdline, 202 shortname='archive_repo', 203 timeout_seconds=3 * 60) 204 205 jobset.message('START', 'Archiving local repository.', do_newline=True) 206 num_failures, _ = jobset.run([archive_job], 207 newline_on_success=True, 208 maxjobs=1) 209 if num_failures == 0: 210 jobset.message('SUCCESS', 211 'Archive with local repository created successfully.', 212 do_newline=True) 213 else: 214 jobset.message('FAILED', 215 'Failed to archive local repository.', 216 do_newline=True) 217 sys.exit(1) 218 219 220def prepare_remote_hosts(hosts, prepare_local=False): 221 """Prepares remote hosts (and maybe prepare localhost as well).""" 222 prepare_timeout = 10 * 60 223 prepare_jobs = [] 224 for host in hosts: 225 user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, host) 226 prepare_jobs.append( 227 jobset.JobSpec( 228 cmdline=['tools/run_tests/performance/remote_host_prepare.sh'], 229 shortname='remote_host_prepare.%s' % host, 230 environ={'USER_AT_HOST': user_at_host}, 231 timeout_seconds=prepare_timeout)) 232 if prepare_local: 233 # Prepare localhost as well 234 prepare_jobs.append( 235 jobset.JobSpec( 236 cmdline=['tools/run_tests/performance/kill_workers.sh'], 237 shortname='local_prepare', 238 timeout_seconds=prepare_timeout)) 239 jobset.message('START', 'Preparing hosts.', do_newline=True) 240 num_failures, _ = jobset.run(prepare_jobs, 241 newline_on_success=True, 242 maxjobs=10) 243 if num_failures == 0: 244 jobset.message('SUCCESS', 245 'Prepare step completed successfully.', 246 do_newline=True) 247 else: 248 jobset.message('FAILED', 249 'Failed to prepare remote hosts.', 250 do_newline=True) 251 sys.exit(1) 252 253 254def build_on_remote_hosts(hosts, 255 languages=scenario_config.LANGUAGES.keys(), 256 build_local=False): 257 """Builds performance worker on remote hosts (and maybe also locally).""" 258 build_timeout = 45 * 60 259 # Kokoro VMs (which are local only) do not have caching, so they need more time to build 260 local_build_timeout = 60 * 60 261 build_jobs = [] 262 for host in hosts: 263 user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, host) 264 build_jobs.append( 265 jobset.JobSpec( 266 cmdline=['tools/run_tests/performance/remote_host_build.sh'] + 267 languages, 268 shortname='remote_host_build.%s' % host, 269 environ={ 270 'USER_AT_HOST': user_at_host, 271 'CONFIG': 'opt' 272 }, 273 timeout_seconds=build_timeout)) 274 if build_local: 275 # start port server locally 276 build_jobs.append( 277 jobset.JobSpec( 278 cmdline=['python', 'tools/run_tests/start_port_server.py'], 279 shortname='local_start_port_server', 280 timeout_seconds=2 * 60)) 281 # Build locally as well 282 build_jobs.append( 283 jobset.JobSpec( 284 cmdline=['tools/run_tests/performance/build_performance.sh'] + 285 languages, 286 shortname='local_build', 287 environ={'CONFIG': 'opt'}, 288 timeout_seconds=local_build_timeout)) 289 jobset.message('START', 'Building.', do_newline=True) 290 num_failures, _ = jobset.run(build_jobs, 291 newline_on_success=True, 292 maxjobs=10) 293 if num_failures == 0: 294 jobset.message('SUCCESS', 'Built successfully.', do_newline=True) 295 else: 296 jobset.message('FAILED', 'Build failed.', do_newline=True) 297 sys.exit(1) 298 299 300def create_qpsworkers(languages, worker_hosts, perf_cmd=None): 301 """Creates QPS workers (but does not start them).""" 302 if not worker_hosts: 303 # run two workers locally (for each language) 304 workers = [(None, 10000), (None, 10010)] 305 elif len(worker_hosts) == 1: 306 # run two workers on the remote host (for each language) 307 workers = [(worker_hosts[0], 10000), (worker_hosts[0], 10010)] 308 else: 309 # run one worker per each remote host (for each language) 310 workers = [(worker_host, 10000) for worker_host in worker_hosts] 311 312 return [ 313 create_qpsworker_job(language, 314 shortname='qps_worker_%s_%s' % 315 (language, worker_idx), 316 port=worker[1] + language.worker_port_offset(), 317 remote_host=worker[0], 318 perf_cmd=perf_cmd) 319 for language in languages 320 for worker_idx, worker in enumerate(workers) 321 ] 322 323 324def perf_report_processor_job(worker_host, perf_base_name, output_filename, 325 flame_graph_reports): 326 print('Creating perf report collection job for %s' % worker_host) 327 cmd = '' 328 if worker_host != 'localhost': 329 user_at_host = "%s@%s" % (_REMOTE_HOST_USERNAME, worker_host) 330 cmd = "USER_AT_HOST=%s OUTPUT_FILENAME=%s OUTPUT_DIR=%s PERF_BASE_NAME=%s tools/run_tests/performance/process_remote_perf_flamegraphs.sh" % ( 331 user_at_host, output_filename, flame_graph_reports, perf_base_name) 332 else: 333 cmd = "OUTPUT_FILENAME=%s OUTPUT_DIR=%s PERF_BASE_NAME=%s tools/run_tests/performance/process_local_perf_flamegraphs.sh" % ( 334 output_filename, flame_graph_reports, perf_base_name) 335 336 return jobset.JobSpec(cmdline=cmd, 337 timeout_seconds=3 * 60, 338 shell=True, 339 verbose_success=True, 340 shortname='process perf report') 341 342 343Scenario = collections.namedtuple('Scenario', 'jobspec workers name') 344 345 346def create_scenarios(languages, 347 workers_by_lang, 348 remote_host=None, 349 regex='.*', 350 category='all', 351 bq_result_table=None, 352 netperf=False, 353 netperf_hosts=[], 354 server_cpu_load=0): 355 """Create jobspecs for scenarios to run.""" 356 all_workers = [ 357 worker for workers in workers_by_lang.values() for worker in workers 358 ] 359 scenarios = [] 360 _NO_WORKERS = [] 361 362 if netperf: 363 if not netperf_hosts: 364 netperf_server = 'localhost' 365 netperf_client = None 366 elif len(netperf_hosts) == 1: 367 netperf_server = netperf_hosts[0] 368 netperf_client = netperf_hosts[0] 369 else: 370 netperf_server = netperf_hosts[0] 371 netperf_client = netperf_hosts[1] 372 scenarios.append( 373 Scenario( 374 create_netperf_jobspec(server_host=netperf_server, 375 client_host=netperf_client, 376 bq_result_table=bq_result_table), 377 _NO_WORKERS, 'netperf')) 378 379 for language in languages: 380 for scenario_json in language.scenarios(): 381 if re.search(regex, scenario_json['name']): 382 categories = scenario_json.get('CATEGORIES', 383 ['scalable', 'smoketest']) 384 if category in categories or category == 'all': 385 workers = workers_by_lang[str(language)][:] 386 # 'SERVER_LANGUAGE' is an indicator for this script to pick 387 # a server in different language. 388 custom_server_lang = scenario_json.get( 389 'SERVER_LANGUAGE', None) 390 custom_client_lang = scenario_json.get( 391 'CLIENT_LANGUAGE', None) 392 scenario_json = scenario_config.remove_nonproto_fields( 393 scenario_json) 394 if custom_server_lang and custom_client_lang: 395 raise Exception( 396 'Cannot set both custom CLIENT_LANGUAGE and SERVER_LANGUAGE' 397 'in the same scenario') 398 if custom_server_lang: 399 if not workers_by_lang.get(custom_server_lang, []): 400 print('Warning: Skipping scenario %s as' % 401 scenario_json['name']) 402 print( 403 'SERVER_LANGUAGE is set to %s yet the language has ' 404 'not been selected with -l' % 405 custom_server_lang) 406 continue 407 for idx in range(0, scenario_json['num_servers']): 408 # replace first X workers by workers of a different language 409 workers[idx] = workers_by_lang[custom_server_lang][ 410 idx] 411 if custom_client_lang: 412 if not workers_by_lang.get(custom_client_lang, []): 413 print('Warning: Skipping scenario %s as' % 414 scenario_json['name']) 415 print( 416 'CLIENT_LANGUAGE is set to %s yet the language has ' 417 'not been selected with -l' % 418 custom_client_lang) 419 continue 420 for idx in range(scenario_json['num_servers'], 421 len(workers)): 422 # replace all client workers by workers of a different language, 423 # leave num_server workers as they are server workers. 424 workers[idx] = workers_by_lang[custom_client_lang][ 425 idx] 426 scenario = Scenario( 427 create_scenario_jobspec( 428 scenario_json, [w.host_and_port for w in workers], 429 remote_host=remote_host, 430 bq_result_table=bq_result_table, 431 server_cpu_load=server_cpu_load), workers, 432 scenario_json['name']) 433 scenarios.append(scenario) 434 435 return scenarios 436 437 438def finish_qps_workers(jobs, qpsworker_jobs): 439 """Waits for given jobs to finish and eventually kills them.""" 440 retries = 0 441 num_killed = 0 442 while any(job.is_running() for job in jobs): 443 for job in qpsworker_jobs: 444 if job.is_running(): 445 print('QPS worker "%s" is still running.' % job.host_and_port) 446 if retries > 10: 447 print('Killing all QPS workers.') 448 for job in jobs: 449 job.kill() 450 num_killed += 1 451 retries += 1 452 time.sleep(3) 453 print('All QPS workers finished.') 454 return num_killed 455 456 457profile_output_files = [] 458 459 460# Collect perf text reports and flamegraphs if perf_cmd was used 461# Note the base names of perf text reports are used when creating and processing 462# perf data. The scenario name uniqifies the output name in the final 463# perf reports directory. 464# Alos, the perf profiles need to be fetched and processed after each scenario 465# in order to avoid clobbering the output files. 466def run_collect_perf_profile_jobs(hosts_and_base_names, scenario_name, 467 flame_graph_reports): 468 perf_report_jobs = [] 469 global profile_output_files 470 for host_and_port in hosts_and_base_names: 471 perf_base_name = hosts_and_base_names[host_and_port] 472 output_filename = '%s-%s' % (scenario_name, perf_base_name) 473 # from the base filename, create .svg output filename 474 host = host_and_port.split(':')[0] 475 profile_output_files.append('%s.svg' % output_filename) 476 perf_report_jobs.append( 477 perf_report_processor_job(host, perf_base_name, output_filename, 478 flame_graph_reports)) 479 480 jobset.message('START', 481 'Collecting perf reports from qps workers', 482 do_newline=True) 483 failures, _ = jobset.run(perf_report_jobs, 484 newline_on_success=True, 485 maxjobs=1) 486 jobset.message('SUCCESS', 487 'Collecting perf reports from qps workers', 488 do_newline=True) 489 return failures 490 491 492def main(): 493 argp = argparse.ArgumentParser(description='Run performance tests.') 494 argp.add_argument('-l', 495 '--language', 496 choices=['all'] + 497 sorted(scenario_config.LANGUAGES.keys()), 498 nargs='+', 499 required=True, 500 help='Languages to benchmark.') 501 argp.add_argument( 502 '--remote_driver_host', 503 default=None, 504 help= 505 'Run QPS driver on given host. By default, QPS driver is run locally.') 506 argp.add_argument('--remote_worker_host', 507 nargs='+', 508 default=[], 509 help='Worker hosts where to start QPS workers.') 510 argp.add_argument( 511 '--dry_run', 512 default=False, 513 action='store_const', 514 const=True, 515 help='Just list scenarios to be run, but don\'t run them.') 516 argp.add_argument('-r', 517 '--regex', 518 default='.*', 519 type=str, 520 help='Regex to select scenarios to run.') 521 argp.add_argument('--bq_result_table', 522 default=None, 523 type=str, 524 help='Bigquery "dataset.table" to upload results to.') 525 argp.add_argument('--category', 526 choices=['smoketest', 'all', 'scalable', 'sweep'], 527 default='all', 528 help='Select a category of tests to run.') 529 argp.add_argument('--netperf', 530 default=False, 531 action='store_const', 532 const=True, 533 help='Run netperf benchmark as one of the scenarios.') 534 argp.add_argument( 535 '--server_cpu_load', 536 default=0, 537 type=int, 538 help='Select a targeted server cpu load to run. 0 means ignore this flag' 539 ) 540 argp.add_argument('-x', 541 '--xml_report', 542 default='report.xml', 543 type=str, 544 help='Name of XML report file to generate.') 545 argp.add_argument( 546 '--perf_args', 547 help=('Example usage: "--perf_args=record -F 99 -g". ' 548 'Wrap QPS workers in a perf command ' 549 'with the arguments to perf specified here. ' 550 '".svg" flame graph profiles will be ' 551 'created for each Qps Worker on each scenario. ' 552 'Files will output to "<repo_root>/<args.flame_graph_reports>" ' 553 'directory. Output files from running the worker ' 554 'under perf are saved in the repo root where its ran. ' 555 'Note that the perf "-g" flag is necessary for ' 556 'flame graphs generation to work (assuming the binary ' 557 'being profiled uses frame pointers, check out ' 558 '"--call-graph dwarf" option using libunwind otherwise.) ' 559 'Also note that the entire "--perf_args=<arg(s)>" must ' 560 'be wrapped in quotes as in the example usage. ' 561 'If the "--perg_args" is unspecified, "perf" will ' 562 'not be used at all. ' 563 'See http://www.brendangregg.com/perf.html ' 564 'for more general perf examples.')) 565 argp.add_argument( 566 '--skip_generate_flamegraphs', 567 default=False, 568 action='store_const', 569 const=True, 570 help=('Turn flame graph generation off. ' 571 'May be useful if "perf_args" arguments do not make sense for ' 572 'generating flamegraphs (e.g., "--perf_args=stat ...")')) 573 argp.add_argument( 574 '-f', 575 '--flame_graph_reports', 576 default='perf_reports', 577 type=str, 578 help= 579 'Name of directory to output flame graph profiles to, if any are created.' 580 ) 581 argp.add_argument( 582 '-u', 583 '--remote_host_username', 584 default='', 585 type=str, 586 help='Use a username that isn\'t "Jenkins" to SSH into remote workers.') 587 588 args = argp.parse_args() 589 590 global _REMOTE_HOST_USERNAME 591 if args.remote_host_username: 592 _REMOTE_HOST_USERNAME = args.remote_host_username 593 594 languages = set( 595 scenario_config.LANGUAGES[l] for l in itertools.chain.from_iterable( 596 six.iterkeys(scenario_config.LANGUAGES) if x == 'all' else [x] 597 for x in args.language)) 598 599 # Put together set of remote hosts where to run and build 600 remote_hosts = set() 601 if args.remote_worker_host: 602 for host in args.remote_worker_host: 603 remote_hosts.add(host) 604 if args.remote_driver_host: 605 remote_hosts.add(args.remote_driver_host) 606 607 if not args.dry_run: 608 if remote_hosts: 609 archive_repo(languages=[str(l) for l in languages]) 610 prepare_remote_hosts(remote_hosts, prepare_local=True) 611 else: 612 prepare_remote_hosts([], prepare_local=True) 613 614 build_local = False 615 if not args.remote_driver_host: 616 build_local = True 617 if not args.dry_run: 618 build_on_remote_hosts(remote_hosts, 619 languages=[str(l) for l in languages], 620 build_local=build_local) 621 622 perf_cmd = None 623 if args.perf_args: 624 print('Running workers under perf profiler') 625 # Expect /usr/bin/perf to be installed here, as is usual 626 perf_cmd = ['/usr/bin/perf'] 627 perf_cmd.extend(re.split('\s+', args.perf_args)) 628 629 qpsworker_jobs = create_qpsworkers(languages, 630 args.remote_worker_host, 631 perf_cmd=perf_cmd) 632 633 # get list of worker addresses for each language. 634 workers_by_lang = dict([(str(language), []) for language in languages]) 635 for job in qpsworker_jobs: 636 workers_by_lang[str(job.language)].append(job) 637 638 scenarios = create_scenarios(languages, 639 workers_by_lang=workers_by_lang, 640 remote_host=args.remote_driver_host, 641 regex=args.regex, 642 category=args.category, 643 bq_result_table=args.bq_result_table, 644 netperf=args.netperf, 645 netperf_hosts=args.remote_worker_host, 646 server_cpu_load=args.server_cpu_load) 647 648 if not scenarios: 649 raise Exception('No scenarios to run') 650 651 total_scenario_failures = 0 652 qps_workers_killed = 0 653 merged_resultset = {} 654 perf_report_failures = 0 655 656 for scenario in scenarios: 657 if args.dry_run: 658 print(scenario.name) 659 else: 660 scenario_failures = 0 661 try: 662 for worker in scenario.workers: 663 worker.start() 664 jobs = [scenario.jobspec] 665 if scenario.workers: 666 # TODO(jtattermusch): ideally the "quit" job won't show up 667 # in the report 668 jobs.append( 669 create_quit_jobspec( 670 scenario.workers, 671 remote_host=args.remote_driver_host)) 672 scenario_failures, resultset = jobset.run( 673 jobs, newline_on_success=True, maxjobs=1) 674 total_scenario_failures += scenario_failures 675 merged_resultset = dict( 676 itertools.chain(six.iteritems(merged_resultset), 677 six.iteritems(resultset))) 678 finally: 679 # Consider qps workers that need to be killed as failures 680 qps_workers_killed += finish_qps_workers( 681 scenario.workers, qpsworker_jobs) 682 683 if perf_cmd and scenario_failures == 0 and not args.skip_generate_flamegraphs: 684 workers_and_base_names = {} 685 for worker in scenario.workers: 686 if not worker.perf_file_base_name: 687 raise Exception( 688 'using perf buf perf report filename is unspecified' 689 ) 690 workers_and_base_names[ 691 worker.host_and_port] = worker.perf_file_base_name 692 perf_report_failures += run_collect_perf_profile_jobs( 693 workers_and_base_names, scenario.name, 694 args.flame_graph_reports) 695 696 # Still write the index.html even if some scenarios failed. 697 # 'profile_output_files' will only have names for scenarios that passed 698 if perf_cmd and not args.skip_generate_flamegraphs: 699 # write the index fil to the output dir, with all profiles from all scenarios/workers 700 report_utils.render_perf_profiling_results( 701 '%s/index.html' % args.flame_graph_reports, profile_output_files) 702 703 report_utils.render_junit_xml_report(merged_resultset, 704 args.xml_report, 705 suite_name='benchmarks', 706 multi_target=True) 707 708 if total_scenario_failures > 0 or qps_workers_killed > 0: 709 print('%s scenarios failed and %s qps worker jobs killed' % 710 (total_scenario_failures, qps_workers_killed)) 711 sys.exit(1) 712 713 if perf_report_failures > 0: 714 print('%s perf profile collection jobs failed' % perf_report_failures) 715 sys.exit(1) 716 717 718if __name__ == "__main__": 719 main() 720