1#!/usr/bin/env python 2# Copyright 2017 gRPC authors. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15"""Measure the time between PR creation and completion of all tests. 16 17You'll need a github API token to avoid being rate-limited. See 18https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/ 19 20This script goes over the most recent 100 pull requests. For PRs with a single 21commit, it uses the PR's creation as the initial time; otherwise, it uses the 22date of the last commit. This is somewhat fragile, and imposed by the fact that 23GitHub reports a PR's updated timestamp for any event that modifies the PR (e.g. 24comments), not just the addition of new commits. 25 26In addition, it ignores latencies greater than five hours, as that's likely due 27to a manual re-run of tests. 28""" 29 30from __future__ import absolute_import 31from __future__ import division 32from __future__ import print_function 33 34import json 35import logging 36import pprint 37import urllib2 38 39from datetime import datetime, timedelta 40 41logging.basicConfig(format='%(asctime)s %(message)s') 42 43PRS = 'https://api.github.com/repos/grpc/grpc/pulls?state=open&per_page=100' 44COMMITS = 'https://api.github.com/repos/grpc/grpc/pulls/{pr_number}/commits' 45 46 47def gh(url): 48 request = urllib2.Request(url) 49 if TOKEN: 50 request.add_header('Authorization', 'token {}'.format(TOKEN)) 51 response = urllib2.urlopen(request) 52 return response.read() 53 54 55def print_csv_header(): 56 print('pr,base_time,test_time,latency_seconds,successes,failures,errors') 57 58 59def output(pr, 60 base_time, 61 test_time, 62 diff_time, 63 successes, 64 failures, 65 errors, 66 mode='human'): 67 if mode == 'human': 68 print( 69 "PR #{} base time: {} UTC, Tests completed at: {} UTC. Latency: {}." 70 "\n\tSuccesses: {}, Failures: {}, Errors: {}".format( 71 pr, base_time, test_time, diff_time, successes, failures, 72 errors)) 73 elif mode == 'csv': 74 print(','.join([ 75 str(pr), 76 str(base_time), 77 str(test_time), 78 str(int((test_time - base_time).total_seconds())), 79 str(successes), 80 str(failures), 81 str(errors) 82 ])) 83 84 85def parse_timestamp(datetime_str): 86 return datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%SZ') 87 88 89def to_posix_timestamp(dt): 90 return str((dt - datetime(1970, 1, 1)).total_seconds()) 91 92 93def get_pr_data(): 94 latest_prs = json.loads(gh(PRS)) 95 res = [{ 96 'number': pr['number'], 97 'created_at': parse_timestamp(pr['created_at']), 98 'updated_at': parse_timestamp(pr['updated_at']), 99 'statuses_url': pr['statuses_url'] 100 } for pr in latest_prs] 101 return res 102 103 104def get_commits_data(pr_number): 105 commits = json.loads(gh(COMMITS.format(pr_number=pr_number))) 106 return { 107 'num_commits': 108 len(commits), 109 'most_recent_date': 110 parse_timestamp(commits[-1]['commit']['author']['date']) 111 } 112 113 114def get_status_data(statuses_url, system): 115 status_url = statuses_url.replace('statuses', 'status') 116 statuses = json.loads(gh(status_url + '?per_page=100')) 117 successes = 0 118 failures = 0 119 errors = 0 120 latest_datetime = None 121 if not statuses: 122 return None 123 if system == 'kokoro': 124 string_in_target_url = 'kokoro' 125 elif system == 'jenkins': 126 string_in_target_url = 'grpc-testing' 127 for status in statuses['statuses']: 128 if not status['target_url'] or string_in_target_url not in status[ 129 'target_url']: 130 continue # Ignore jenkins 131 if status['state'] == 'pending': 132 return None 133 elif status['state'] == 'success': 134 successes += 1 135 elif status['state'] == 'failure': 136 failures += 1 137 elif status['state'] == 'error': 138 errors += 1 139 if not latest_datetime: 140 latest_datetime = parse_timestamp(status['updated_at']) 141 else: 142 latest_datetime = max(latest_datetime, 143 parse_timestamp(status['updated_at'])) 144 # First status is the most recent one. 145 if any([successes, failures, errors 146 ]) and sum([successes, failures, errors]) > 15: 147 return { 148 'latest_datetime': latest_datetime, 149 'successes': successes, 150 'failures': failures, 151 'errors': errors 152 } 153 else: 154 return None 155 156 157def build_args_parser(): 158 import argparse 159 parser = argparse.ArgumentParser() 160 parser.add_argument('--format', 161 type=str, 162 choices=['human', 'csv'], 163 default='human', 164 help='Output format: are you a human or a machine?') 165 parser.add_argument('--system', 166 type=str, 167 choices=['jenkins', 'kokoro'], 168 required=True, 169 help='Consider only the given CI system') 170 parser.add_argument( 171 '--token', 172 type=str, 173 default='', 174 help='GitHub token to use its API with a higher rate limit') 175 return parser 176 177 178def main(): 179 import sys 180 global TOKEN 181 args_parser = build_args_parser() 182 args = args_parser.parse_args() 183 TOKEN = args.token 184 if args.format == 'csv': 185 print_csv_header() 186 for pr_data in get_pr_data(): 187 commit_data = get_commits_data(pr_data['number']) 188 # PR with a single commit -> use the PRs creation time. 189 # else -> use the latest commit's date. 190 base_timestamp = pr_data['updated_at'] 191 if commit_data['num_commits'] > 1: 192 base_timestamp = commit_data['most_recent_date'] 193 else: 194 base_timestamp = pr_data['created_at'] 195 last_status = get_status_data(pr_data['statuses_url'], args.system) 196 if last_status: 197 diff = last_status['latest_datetime'] - base_timestamp 198 if diff < timedelta(hours=5): 199 output(pr_data['number'], 200 base_timestamp, 201 last_status['latest_datetime'], 202 diff, 203 last_status['successes'], 204 last_status['failures'], 205 last_status['errors'], 206 mode=args.format) 207 208 209if __name__ == '__main__': 210 main() 211