1#!/usr/bin/env python 2# Copyright 2016 gRPC authors. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16# Uploads performance benchmark result file to bigquery. 17 18from __future__ import print_function 19 20import argparse 21import calendar 22import json 23import os 24import sys 25import time 26import uuid 27import massage_qps_stats 28 29gcp_utils_dir = os.path.abspath( 30 os.path.join(os.path.dirname(__file__), '../../gcp/utils')) 31sys.path.append(gcp_utils_dir) 32import big_query_utils 33 34_PROJECT_ID = 'grpc-testing' 35 36 37def _upload_netperf_latency_csv_to_bigquery(dataset_id, table_id, result_file): 38 with open(result_file, 'r') as f: 39 (col1, col2, col3) = f.read().split(',') 40 latency50 = float(col1.strip()) * 1000 41 latency90 = float(col2.strip()) * 1000 42 latency99 = float(col3.strip()) * 1000 43 44 scenario_result = { 45 'scenario': { 46 'name': 'netperf_tcp_rr' 47 }, 48 'summary': { 49 'latency50': latency50, 50 'latency90': latency90, 51 'latency99': latency99 52 } 53 } 54 55 bq = big_query_utils.create_big_query() 56 _create_results_table(bq, dataset_id, table_id) 57 58 if not _insert_result( 59 bq, dataset_id, table_id, scenario_result, flatten=False): 60 print('Error uploading result to bigquery.') 61 sys.exit(1) 62 63 64def _upload_scenario_result_to_bigquery(dataset_id, table_id, result_file): 65 with open(result_file, 'r') as f: 66 scenario_result = json.loads(f.read()) 67 68 bq = big_query_utils.create_big_query() 69 _create_results_table(bq, dataset_id, table_id) 70 71 if not _insert_result(bq, dataset_id, table_id, scenario_result): 72 print('Error uploading result to bigquery.') 73 sys.exit(1) 74 75 76def _insert_result(bq, dataset_id, table_id, scenario_result, flatten=True): 77 if flatten: 78 _flatten_result_inplace(scenario_result) 79 _populate_metadata_inplace(scenario_result) 80 row = big_query_utils.make_row(str(uuid.uuid4()), scenario_result) 81 return big_query_utils.insert_rows(bq, _PROJECT_ID, dataset_id, table_id, 82 [row]) 83 84 85def _create_results_table(bq, dataset_id, table_id): 86 with open(os.path.dirname(__file__) + '/scenario_result_schema.json', 87 'r') as f: 88 table_schema = json.loads(f.read()) 89 desc = 'Results of performance benchmarks.' 90 return big_query_utils.create_table2(bq, _PROJECT_ID, dataset_id, table_id, 91 table_schema, desc) 92 93 94def _flatten_result_inplace(scenario_result): 95 """Bigquery is not really great for handling deeply nested data 96 and repeated fields. To maintain values of some fields while keeping 97 the schema relatively simple, we artificially leave some of the fields 98 as JSON strings. 99 """ 100 scenario_result['scenario']['clientConfig'] = json.dumps( 101 scenario_result['scenario']['clientConfig']) 102 scenario_result['scenario']['serverConfig'] = json.dumps( 103 scenario_result['scenario']['serverConfig']) 104 scenario_result['latencies'] = json.dumps(scenario_result['latencies']) 105 scenario_result['serverCpuStats'] = [] 106 for stats in scenario_result['serverStats']: 107 scenario_result['serverCpuStats'].append(dict()) 108 scenario_result['serverCpuStats'][-1]['totalCpuTime'] = stats.pop( 109 'totalCpuTime', None) 110 scenario_result['serverCpuStats'][-1]['idleCpuTime'] = stats.pop( 111 'idleCpuTime', None) 112 for stats in scenario_result['clientStats']: 113 stats['latencies'] = json.dumps(stats['latencies']) 114 stats.pop('requestResults', None) 115 scenario_result['serverCores'] = json.dumps(scenario_result['serverCores']) 116 scenario_result['clientSuccess'] = json.dumps( 117 scenario_result['clientSuccess']) 118 scenario_result['serverSuccess'] = json.dumps( 119 scenario_result['serverSuccess']) 120 scenario_result['requestResults'] = json.dumps( 121 scenario_result.get('requestResults', [])) 122 scenario_result['serverCpuUsage'] = scenario_result['summary'].pop( 123 'serverCpuUsage', None) 124 scenario_result['summary'].pop('successfulRequestsPerSecond', None) 125 scenario_result['summary'].pop('failedRequestsPerSecond', None) 126 massage_qps_stats.massage_qps_stats(scenario_result) 127 128 129def _populate_metadata_inplace(scenario_result): 130 """Populates metadata based on environment variables set by Jenkins.""" 131 # NOTE: Grabbing the Kokoro environment variables will only work if the 132 # driver is running locally on the same machine where Kokoro has started 133 # the job. For our setup, this is currently the case, so just assume that. 134 build_number = os.getenv('KOKORO_BUILD_NUMBER') 135 build_url = 'https://source.cloud.google.com/results/invocations/%s' % os.getenv( 136 'KOKORO_BUILD_ID') 137 job_name = os.getenv('KOKORO_JOB_NAME') 138 git_commit = os.getenv('KOKORO_GIT_COMMIT') 139 # actual commit is the actual head of PR that is getting tested 140 # TODO(jtattermusch): unclear how to obtain on Kokoro 141 git_actual_commit = os.getenv('ghprbActualCommit') 142 143 utc_timestamp = str(calendar.timegm(time.gmtime())) 144 metadata = {'created': utc_timestamp} 145 146 if build_number: 147 metadata['buildNumber'] = build_number 148 if build_url: 149 metadata['buildUrl'] = build_url 150 if job_name: 151 metadata['jobName'] = job_name 152 if git_commit: 153 metadata['gitCommit'] = git_commit 154 if git_actual_commit: 155 metadata['gitActualCommit'] = git_actual_commit 156 157 scenario_result['metadata'] = metadata 158 159 160argp = argparse.ArgumentParser(description='Upload result to big query.') 161argp.add_argument( 162 '--bq_result_table', 163 required=True, 164 default=None, 165 type=str, 166 help='Bigquery "dataset.table" to upload results to.') 167argp.add_argument( 168 '--file_to_upload', 169 default='scenario_result.json', 170 type=str, 171 help='Report file to upload.') 172argp.add_argument( 173 '--file_format', 174 choices=['scenario_result', 'netperf_latency_csv'], 175 default='scenario_result', 176 help='Format of the file to upload.') 177 178args = argp.parse_args() 179 180dataset_id, table_id = args.bq_result_table.split('.', 2) 181 182if args.file_format == 'netperf_latency_csv': 183 _upload_netperf_latency_csv_to_bigquery(dataset_id, table_id, 184 args.file_to_upload) 185else: 186 _upload_scenario_result_to_bigquery(dataset_id, table_id, 187 args.file_to_upload) 188print('Successfully uploaded %s to BigQuery.\n' % args.file_to_upload) 189