• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2# Copyright 2016 gRPC authors.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""Run performance tests locally or remotely."""
16
17from __future__ import print_function
18
19import argparse
20import collections
21import itertools
22import json
23import multiprocessing
24import os
25import pipes
26import re
27import subprocess
28import sys
29import tempfile
30import time
31import traceback
32import uuid
33import six
34
35import performance.scenario_config as scenario_config
36import python_utils.jobset as jobset
37import python_utils.report_utils as report_utils
38
39_ROOT = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '../..'))
40os.chdir(_ROOT)
41
42_REMOTE_HOST_USERNAME = 'jenkins'
43
44_SCENARIO_TIMEOUT = 3 * 60
45_WORKER_TIMEOUT = 3 * 60
46_NETPERF_TIMEOUT = 60
47_QUIT_WORKER_TIMEOUT = 2 * 60
48
49
50class QpsWorkerJob:
51    """Encapsulates a qps worker server job."""
52
53    def __init__(self, spec, language, host_and_port, perf_file_base_name=None):
54        self._spec = spec
55        self.language = language
56        self.host_and_port = host_and_port
57        self._job = None
58        self.perf_file_base_name = perf_file_base_name
59
60    def start(self):
61        self._job = jobset.Job(self._spec,
62                               newline_on_success=True,
63                               travis=True,
64                               add_env={})
65
66    def is_running(self):
67        """Polls a job and returns True if given job is still running."""
68        return self._job and self._job.state() == jobset._RUNNING
69
70    def kill(self):
71        if self._job:
72            self._job.kill()
73            self._job = None
74
75
76def create_qpsworker_job(language,
77                         shortname=None,
78                         port=10000,
79                         remote_host=None,
80                         perf_cmd=None):
81    cmdline = (language.worker_cmdline() + ['--driver_port=%s' % port])
82
83    if remote_host:
84        host_and_port = '%s:%s' % (remote_host, port)
85    else:
86        host_and_port = 'localhost:%s' % port
87
88    perf_file_base_name = None
89    if perf_cmd:
90        perf_file_base_name = '%s-%s' % (host_and_port, shortname)
91        # specify -o output file so perf.data gets collected when worker stopped
92        cmdline = perf_cmd + ['-o', '%s-perf.data' % perf_file_base_name
93                             ] + cmdline
94
95    worker_timeout = _WORKER_TIMEOUT
96    if remote_host:
97        user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, remote_host)
98        ssh_cmd = ['ssh']
99        cmdline = ['timeout', '%s' % (worker_timeout + 30)] + cmdline
100        ssh_cmd.extend([
101            str(user_at_host),
102            'cd ~/performance_workspace/grpc/ && %s' % ' '.join(cmdline)
103        ])
104        cmdline = ssh_cmd
105
106    jobspec = jobset.JobSpec(
107        cmdline=cmdline,
108        shortname=shortname,
109        timeout_seconds=
110        worker_timeout,  # workers get restarted after each scenario
111        verbose_success=True)
112    return QpsWorkerJob(jobspec, language, host_and_port, perf_file_base_name)
113
114
115def create_scenario_jobspec(scenario_json,
116                            workers,
117                            remote_host=None,
118                            bq_result_table=None,
119                            server_cpu_load=0):
120    """Runs one scenario using QPS driver."""
121    # setting QPS_WORKERS env variable here makes sure it works with SSH too.
122    cmd = 'QPS_WORKERS="%s" ' % ','.join(workers)
123    if bq_result_table:
124        cmd += 'BQ_RESULT_TABLE="%s" ' % bq_result_table
125    cmd += 'tools/run_tests/performance/run_qps_driver.sh '
126    cmd += '--scenarios_json=%s ' % pipes.quote(
127        json.dumps({'scenarios': [scenario_json]}))
128    cmd += '--scenario_result_file=scenario_result.json '
129    if server_cpu_load != 0:
130        cmd += '--search_param=offered_load --initial_search_value=1000 --targeted_cpu_load=%d --stride=500 --error_tolerance=0.01' % server_cpu_load
131    if remote_host:
132        user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, remote_host)
133        cmd = 'ssh %s "cd ~/performance_workspace/grpc/ && "%s' % (
134            user_at_host, pipes.quote(cmd))
135
136    return jobset.JobSpec(cmdline=[cmd],
137                          shortname='%s' % scenario_json['name'],
138                          timeout_seconds=_SCENARIO_TIMEOUT,
139                          shell=True,
140                          verbose_success=True)
141
142
143def create_quit_jobspec(workers, remote_host=None):
144    """Runs quit using QPS driver."""
145    # setting QPS_WORKERS env variable here makes sure it works with SSH too.
146    cmd = 'QPS_WORKERS="%s" cmake/build/qps_json_driver --quit' % ','.join(
147        w.host_and_port for w in workers)
148    if remote_host:
149        user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, remote_host)
150        cmd = 'ssh %s "cd ~/performance_workspace/grpc/ && "%s' % (
151            user_at_host, pipes.quote(cmd))
152
153    return jobset.JobSpec(cmdline=[cmd],
154                          shortname='shutdown_workers',
155                          timeout_seconds=_QUIT_WORKER_TIMEOUT,
156                          shell=True,
157                          verbose_success=True)
158
159
160def create_netperf_jobspec(server_host='localhost',
161                           client_host=None,
162                           bq_result_table=None):
163    """Runs netperf benchmark."""
164    cmd = 'NETPERF_SERVER_HOST="%s" ' % server_host
165    if bq_result_table:
166        cmd += 'BQ_RESULT_TABLE="%s" ' % bq_result_table
167    if client_host:
168        # If netperf is running remotely, the env variables populated by Jenkins
169        # won't be available on the client, but we need them for uploading results
170        # to BigQuery.
171        jenkins_job_name = os.getenv('JOB_NAME')
172        if jenkins_job_name:
173            cmd += 'JOB_NAME="%s" ' % jenkins_job_name
174        jenkins_build_number = os.getenv('BUILD_NUMBER')
175        if jenkins_build_number:
176            cmd += 'BUILD_NUMBER="%s" ' % jenkins_build_number
177
178    cmd += 'tools/run_tests/performance/run_netperf.sh'
179    if client_host:
180        user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, client_host)
181        cmd = 'ssh %s "cd ~/performance_workspace/grpc/ && "%s' % (
182            user_at_host, pipes.quote(cmd))
183
184    return jobset.JobSpec(cmdline=[cmd],
185                          shortname='netperf',
186                          timeout_seconds=_NETPERF_TIMEOUT,
187                          shell=True,
188                          verbose_success=True)
189
190
191def archive_repo(languages):
192    """Archives local version of repo including submodules."""
193    cmdline = ['tar', '-cf', '../grpc.tar', '../grpc/']
194    if 'java' in languages:
195        cmdline.append('../grpc-java')
196    if 'go' in languages:
197        cmdline.append('../grpc-go')
198    if 'node' in languages or 'node_purejs' in languages:
199        cmdline.append('../grpc-node')
200
201    archive_job = jobset.JobSpec(cmdline=cmdline,
202                                 shortname='archive_repo',
203                                 timeout_seconds=3 * 60)
204
205    jobset.message('START', 'Archiving local repository.', do_newline=True)
206    num_failures, _ = jobset.run([archive_job],
207                                 newline_on_success=True,
208                                 maxjobs=1)
209    if num_failures == 0:
210        jobset.message('SUCCESS',
211                       'Archive with local repository created successfully.',
212                       do_newline=True)
213    else:
214        jobset.message('FAILED',
215                       'Failed to archive local repository.',
216                       do_newline=True)
217        sys.exit(1)
218
219
220def prepare_remote_hosts(hosts, prepare_local=False):
221    """Prepares remote hosts (and maybe prepare localhost as well)."""
222    prepare_timeout = 10 * 60
223    prepare_jobs = []
224    for host in hosts:
225        user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, host)
226        prepare_jobs.append(
227            jobset.JobSpec(
228                cmdline=['tools/run_tests/performance/remote_host_prepare.sh'],
229                shortname='remote_host_prepare.%s' % host,
230                environ={'USER_AT_HOST': user_at_host},
231                timeout_seconds=prepare_timeout))
232    if prepare_local:
233        # Prepare localhost as well
234        prepare_jobs.append(
235            jobset.JobSpec(
236                cmdline=['tools/run_tests/performance/kill_workers.sh'],
237                shortname='local_prepare',
238                timeout_seconds=prepare_timeout))
239    jobset.message('START', 'Preparing hosts.', do_newline=True)
240    num_failures, _ = jobset.run(prepare_jobs,
241                                 newline_on_success=True,
242                                 maxjobs=10)
243    if num_failures == 0:
244        jobset.message('SUCCESS',
245                       'Prepare step completed successfully.',
246                       do_newline=True)
247    else:
248        jobset.message('FAILED',
249                       'Failed to prepare remote hosts.',
250                       do_newline=True)
251        sys.exit(1)
252
253
254def build_on_remote_hosts(hosts,
255                          languages=scenario_config.LANGUAGES.keys(),
256                          build_local=False):
257    """Builds performance worker on remote hosts (and maybe also locally)."""
258    build_timeout = 45 * 60
259    # Kokoro VMs (which are local only) do not have caching, so they need more time to build
260    local_build_timeout = 60 * 60
261    build_jobs = []
262    for host in hosts:
263        user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, host)
264        build_jobs.append(
265            jobset.JobSpec(
266                cmdline=['tools/run_tests/performance/remote_host_build.sh'] +
267                languages,
268                shortname='remote_host_build.%s' % host,
269                environ={
270                    'USER_AT_HOST': user_at_host,
271                    'CONFIG': 'opt'
272                },
273                timeout_seconds=build_timeout))
274    if build_local:
275        # start port server locally
276        build_jobs.append(
277            jobset.JobSpec(
278                cmdline=['python', 'tools/run_tests/start_port_server.py'],
279                shortname='local_start_port_server',
280                timeout_seconds=2 * 60))
281        # Build locally as well
282        build_jobs.append(
283            jobset.JobSpec(
284                cmdline=['tools/run_tests/performance/build_performance.sh'] +
285                languages,
286                shortname='local_build',
287                environ={'CONFIG': 'opt'},
288                timeout_seconds=local_build_timeout))
289    jobset.message('START', 'Building.', do_newline=True)
290    num_failures, _ = jobset.run(build_jobs,
291                                 newline_on_success=True,
292                                 maxjobs=10)
293    if num_failures == 0:
294        jobset.message('SUCCESS', 'Built successfully.', do_newline=True)
295    else:
296        jobset.message('FAILED', 'Build failed.', do_newline=True)
297        sys.exit(1)
298
299
300def create_qpsworkers(languages, worker_hosts, perf_cmd=None):
301    """Creates QPS workers (but does not start them)."""
302    if not worker_hosts:
303        # run two workers locally (for each language)
304        workers = [(None, 10000), (None, 10010)]
305    elif len(worker_hosts) == 1:
306        # run two workers on the remote host (for each language)
307        workers = [(worker_hosts[0], 10000), (worker_hosts[0], 10010)]
308    else:
309        # run one worker per each remote host (for each language)
310        workers = [(worker_host, 10000) for worker_host in worker_hosts]
311
312    return [
313        create_qpsworker_job(language,
314                             shortname='qps_worker_%s_%s' %
315                             (language, worker_idx),
316                             port=worker[1] + language.worker_port_offset(),
317                             remote_host=worker[0],
318                             perf_cmd=perf_cmd)
319        for language in languages
320        for worker_idx, worker in enumerate(workers)
321    ]
322
323
324def perf_report_processor_job(worker_host, perf_base_name, output_filename,
325                              flame_graph_reports):
326    print('Creating perf report collection job for %s' % worker_host)
327    cmd = ''
328    if worker_host != 'localhost':
329        user_at_host = "%s@%s" % (_REMOTE_HOST_USERNAME, worker_host)
330        cmd = "USER_AT_HOST=%s OUTPUT_FILENAME=%s OUTPUT_DIR=%s PERF_BASE_NAME=%s tools/run_tests/performance/process_remote_perf_flamegraphs.sh" % (
331            user_at_host, output_filename, flame_graph_reports, perf_base_name)
332    else:
333        cmd = "OUTPUT_FILENAME=%s OUTPUT_DIR=%s PERF_BASE_NAME=%s tools/run_tests/performance/process_local_perf_flamegraphs.sh" % (
334            output_filename, flame_graph_reports, perf_base_name)
335
336    return jobset.JobSpec(cmdline=cmd,
337                          timeout_seconds=3 * 60,
338                          shell=True,
339                          verbose_success=True,
340                          shortname='process perf report')
341
342
343Scenario = collections.namedtuple('Scenario', 'jobspec workers name')
344
345
346def create_scenarios(languages,
347                     workers_by_lang,
348                     remote_host=None,
349                     regex='.*',
350                     category='all',
351                     bq_result_table=None,
352                     netperf=False,
353                     netperf_hosts=[],
354                     server_cpu_load=0):
355    """Create jobspecs for scenarios to run."""
356    all_workers = [
357        worker for workers in workers_by_lang.values() for worker in workers
358    ]
359    scenarios = []
360    _NO_WORKERS = []
361
362    if netperf:
363        if not netperf_hosts:
364            netperf_server = 'localhost'
365            netperf_client = None
366        elif len(netperf_hosts) == 1:
367            netperf_server = netperf_hosts[0]
368            netperf_client = netperf_hosts[0]
369        else:
370            netperf_server = netperf_hosts[0]
371            netperf_client = netperf_hosts[1]
372        scenarios.append(
373            Scenario(
374                create_netperf_jobspec(server_host=netperf_server,
375                                       client_host=netperf_client,
376                                       bq_result_table=bq_result_table),
377                _NO_WORKERS, 'netperf'))
378
379    for language in languages:
380        for scenario_json in language.scenarios():
381            if re.search(regex, scenario_json['name']):
382                categories = scenario_json.get('CATEGORIES',
383                                               ['scalable', 'smoketest'])
384                if category in categories or category == 'all':
385                    workers = workers_by_lang[str(language)][:]
386                    # 'SERVER_LANGUAGE' is an indicator for this script to pick
387                    # a server in different language.
388                    custom_server_lang = scenario_json.get(
389                        'SERVER_LANGUAGE', None)
390                    custom_client_lang = scenario_json.get(
391                        'CLIENT_LANGUAGE', None)
392                    scenario_json = scenario_config.remove_nonproto_fields(
393                        scenario_json)
394                    if custom_server_lang and custom_client_lang:
395                        raise Exception(
396                            'Cannot set both custom CLIENT_LANGUAGE and SERVER_LANGUAGE'
397                            'in the same scenario')
398                    if custom_server_lang:
399                        if not workers_by_lang.get(custom_server_lang, []):
400                            print('Warning: Skipping scenario %s as' %
401                                  scenario_json['name'])
402                            print(
403                                'SERVER_LANGUAGE is set to %s yet the language has '
404                                'not been selected with -l' %
405                                custom_server_lang)
406                            continue
407                        for idx in range(0, scenario_json['num_servers']):
408                            # replace first X workers by workers of a different language
409                            workers[idx] = workers_by_lang[custom_server_lang][
410                                idx]
411                    if custom_client_lang:
412                        if not workers_by_lang.get(custom_client_lang, []):
413                            print('Warning: Skipping scenario %s as' %
414                                  scenario_json['name'])
415                            print(
416                                'CLIENT_LANGUAGE is set to %s yet the language has '
417                                'not been selected with -l' %
418                                custom_client_lang)
419                            continue
420                        for idx in range(scenario_json['num_servers'],
421                                         len(workers)):
422                            # replace all client workers by workers of a different language,
423                            # leave num_server workers as they are server workers.
424                            workers[idx] = workers_by_lang[custom_client_lang][
425                                idx]
426                    scenario = Scenario(
427                        create_scenario_jobspec(
428                            scenario_json, [w.host_and_port for w in workers],
429                            remote_host=remote_host,
430                            bq_result_table=bq_result_table,
431                            server_cpu_load=server_cpu_load), workers,
432                        scenario_json['name'])
433                    scenarios.append(scenario)
434
435    return scenarios
436
437
438def finish_qps_workers(jobs, qpsworker_jobs):
439    """Waits for given jobs to finish and eventually kills them."""
440    retries = 0
441    num_killed = 0
442    while any(job.is_running() for job in jobs):
443        for job in qpsworker_jobs:
444            if job.is_running():
445                print('QPS worker "%s" is still running.' % job.host_and_port)
446        if retries > 10:
447            print('Killing all QPS workers.')
448            for job in jobs:
449                job.kill()
450                num_killed += 1
451        retries += 1
452        time.sleep(3)
453    print('All QPS workers finished.')
454    return num_killed
455
456
457profile_output_files = []
458
459
460# Collect perf text reports and flamegraphs if perf_cmd was used
461# Note the base names of perf text reports are used when creating and processing
462# perf data. The scenario name uniqifies the output name in the final
463# perf reports directory.
464# Alos, the perf profiles need to be fetched and processed after each scenario
465# in order to avoid clobbering the output files.
466def run_collect_perf_profile_jobs(hosts_and_base_names, scenario_name,
467                                  flame_graph_reports):
468    perf_report_jobs = []
469    global profile_output_files
470    for host_and_port in hosts_and_base_names:
471        perf_base_name = hosts_and_base_names[host_and_port]
472        output_filename = '%s-%s' % (scenario_name, perf_base_name)
473        # from the base filename, create .svg output filename
474        host = host_and_port.split(':')[0]
475        profile_output_files.append('%s.svg' % output_filename)
476        perf_report_jobs.append(
477            perf_report_processor_job(host, perf_base_name, output_filename,
478                                      flame_graph_reports))
479
480    jobset.message('START',
481                   'Collecting perf reports from qps workers',
482                   do_newline=True)
483    failures, _ = jobset.run(perf_report_jobs,
484                             newline_on_success=True,
485                             maxjobs=1)
486    jobset.message('SUCCESS',
487                   'Collecting perf reports from qps workers',
488                   do_newline=True)
489    return failures
490
491
492def main():
493    argp = argparse.ArgumentParser(description='Run performance tests.')
494    argp.add_argument('-l',
495                      '--language',
496                      choices=['all'] +
497                      sorted(scenario_config.LANGUAGES.keys()),
498                      nargs='+',
499                      required=True,
500                      help='Languages to benchmark.')
501    argp.add_argument(
502        '--remote_driver_host',
503        default=None,
504        help=
505        'Run QPS driver on given host. By default, QPS driver is run locally.')
506    argp.add_argument('--remote_worker_host',
507                      nargs='+',
508                      default=[],
509                      help='Worker hosts where to start QPS workers.')
510    argp.add_argument(
511        '--dry_run',
512        default=False,
513        action='store_const',
514        const=True,
515        help='Just list scenarios to be run, but don\'t run them.')
516    argp.add_argument('-r',
517                      '--regex',
518                      default='.*',
519                      type=str,
520                      help='Regex to select scenarios to run.')
521    argp.add_argument('--bq_result_table',
522                      default=None,
523                      type=str,
524                      help='Bigquery "dataset.table" to upload results to.')
525    argp.add_argument('--category',
526                      choices=['smoketest', 'all', 'scalable', 'sweep'],
527                      default='all',
528                      help='Select a category of tests to run.')
529    argp.add_argument('--netperf',
530                      default=False,
531                      action='store_const',
532                      const=True,
533                      help='Run netperf benchmark as one of the scenarios.')
534    argp.add_argument(
535        '--server_cpu_load',
536        default=0,
537        type=int,
538        help='Select a targeted server cpu load to run. 0 means ignore this flag'
539    )
540    argp.add_argument('-x',
541                      '--xml_report',
542                      default='report.xml',
543                      type=str,
544                      help='Name of XML report file to generate.')
545    argp.add_argument(
546        '--perf_args',
547        help=('Example usage: "--perf_args=record -F 99 -g". '
548              'Wrap QPS workers in a perf command '
549              'with the arguments to perf specified here. '
550              '".svg" flame graph profiles will be '
551              'created for each Qps Worker on each scenario. '
552              'Files will output to "<repo_root>/<args.flame_graph_reports>" '
553              'directory. Output files from running the worker '
554              'under perf are saved in the repo root where its ran. '
555              'Note that the perf "-g" flag is necessary for '
556              'flame graphs generation to work (assuming the binary '
557              'being profiled uses frame pointers, check out '
558              '"--call-graph dwarf" option using libunwind otherwise.) '
559              'Also note that the entire "--perf_args=<arg(s)>" must '
560              'be wrapped in quotes as in the example usage. '
561              'If the "--perg_args" is unspecified, "perf" will '
562              'not be used at all. '
563              'See http://www.brendangregg.com/perf.html '
564              'for more general perf examples.'))
565    argp.add_argument(
566        '--skip_generate_flamegraphs',
567        default=False,
568        action='store_const',
569        const=True,
570        help=('Turn flame graph generation off. '
571              'May be useful if "perf_args" arguments do not make sense for '
572              'generating flamegraphs (e.g., "--perf_args=stat ...")'))
573    argp.add_argument(
574        '-f',
575        '--flame_graph_reports',
576        default='perf_reports',
577        type=str,
578        help=
579        'Name of directory to output flame graph profiles to, if any are created.'
580    )
581    argp.add_argument(
582        '-u',
583        '--remote_host_username',
584        default='',
585        type=str,
586        help='Use a username that isn\'t "Jenkins" to SSH into remote workers.')
587
588    args = argp.parse_args()
589
590    global _REMOTE_HOST_USERNAME
591    if args.remote_host_username:
592        _REMOTE_HOST_USERNAME = args.remote_host_username
593
594    languages = set(
595        scenario_config.LANGUAGES[l] for l in itertools.chain.from_iterable(
596            six.iterkeys(scenario_config.LANGUAGES) if x == 'all' else [x]
597            for x in args.language))
598
599    # Put together set of remote hosts where to run and build
600    remote_hosts = set()
601    if args.remote_worker_host:
602        for host in args.remote_worker_host:
603            remote_hosts.add(host)
604    if args.remote_driver_host:
605        remote_hosts.add(args.remote_driver_host)
606
607    if not args.dry_run:
608        if remote_hosts:
609            archive_repo(languages=[str(l) for l in languages])
610            prepare_remote_hosts(remote_hosts, prepare_local=True)
611        else:
612            prepare_remote_hosts([], prepare_local=True)
613
614    build_local = False
615    if not args.remote_driver_host:
616        build_local = True
617    if not args.dry_run:
618        build_on_remote_hosts(remote_hosts,
619                              languages=[str(l) for l in languages],
620                              build_local=build_local)
621
622    perf_cmd = None
623    if args.perf_args:
624        print('Running workers under perf profiler')
625        # Expect /usr/bin/perf to be installed here, as is usual
626        perf_cmd = ['/usr/bin/perf']
627        perf_cmd.extend(re.split('\s+', args.perf_args))
628
629    qpsworker_jobs = create_qpsworkers(languages,
630                                       args.remote_worker_host,
631                                       perf_cmd=perf_cmd)
632
633    # get list of worker addresses for each language.
634    workers_by_lang = dict([(str(language), []) for language in languages])
635    for job in qpsworker_jobs:
636        workers_by_lang[str(job.language)].append(job)
637
638    scenarios = create_scenarios(languages,
639                                 workers_by_lang=workers_by_lang,
640                                 remote_host=args.remote_driver_host,
641                                 regex=args.regex,
642                                 category=args.category,
643                                 bq_result_table=args.bq_result_table,
644                                 netperf=args.netperf,
645                                 netperf_hosts=args.remote_worker_host,
646                                 server_cpu_load=args.server_cpu_load)
647
648    if not scenarios:
649        raise Exception('No scenarios to run')
650
651    total_scenario_failures = 0
652    qps_workers_killed = 0
653    merged_resultset = {}
654    perf_report_failures = 0
655
656    for scenario in scenarios:
657        if args.dry_run:
658            print(scenario.name)
659        else:
660            scenario_failures = 0
661            try:
662                for worker in scenario.workers:
663                    worker.start()
664                jobs = [scenario.jobspec]
665                if scenario.workers:
666                    # TODO(jtattermusch): ideally the "quit" job won't show up
667                    # in the report
668                    jobs.append(
669                        create_quit_jobspec(
670                            scenario.workers,
671                            remote_host=args.remote_driver_host))
672                scenario_failures, resultset = jobset.run(
673                    jobs, newline_on_success=True, maxjobs=1)
674                total_scenario_failures += scenario_failures
675                merged_resultset = dict(
676                    itertools.chain(six.iteritems(merged_resultset),
677                                    six.iteritems(resultset)))
678            finally:
679                # Consider qps workers that need to be killed as failures
680                qps_workers_killed += finish_qps_workers(
681                    scenario.workers, qpsworker_jobs)
682
683            if perf_cmd and scenario_failures == 0 and not args.skip_generate_flamegraphs:
684                workers_and_base_names = {}
685                for worker in scenario.workers:
686                    if not worker.perf_file_base_name:
687                        raise Exception(
688                            'using perf buf perf report filename is unspecified'
689                        )
690                    workers_and_base_names[
691                        worker.host_and_port] = worker.perf_file_base_name
692                perf_report_failures += run_collect_perf_profile_jobs(
693                    workers_and_base_names, scenario.name,
694                    args.flame_graph_reports)
695
696    # Still write the index.html even if some scenarios failed.
697    # 'profile_output_files' will only have names for scenarios that passed
698    if perf_cmd and not args.skip_generate_flamegraphs:
699        # write the index fil to the output dir, with all profiles from all scenarios/workers
700        report_utils.render_perf_profiling_results(
701            '%s/index.html' % args.flame_graph_reports, profile_output_files)
702
703    report_utils.render_junit_xml_report(merged_resultset,
704                                         args.xml_report,
705                                         suite_name='benchmarks',
706                                         multi_target=True)
707
708    if total_scenario_failures > 0 or qps_workers_killed > 0:
709        print('%s scenarios failed and %s qps worker jobs killed' %
710              (total_scenario_failures, qps_workers_killed))
711        sys.exit(1)
712
713    if perf_report_failures > 0:
714        print('%s perf profile collection jobs failed' % perf_report_failures)
715        sys.exit(1)
716
717
718if __name__ == "__main__":
719    main()
720