1#!/usr/bin/env python 2# Copyright 2017 gRPC authors. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15"""Measure the time between PR creation and completion of all tests. 16 17You'll need a github API token to avoid being rate-limited. See 18https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/ 19 20This script goes over the most recent 100 pull requests. For PRs with a single 21commit, it uses the PR's creation as the initial time; otherwise, it uses the 22date of the last commit. This is somewhat fragile, and imposed by the fact that 23GitHub reports a PR's updated timestamp for any event that modifies the PR (e.g. 24comments), not just the addition of new commits. 25 26In addition, it ignores latencies greater than five hours, as that's likely due 27to a manual re-run of tests. 28""" 29 30from __future__ import absolute_import 31from __future__ import division 32from __future__ import print_function 33 34import json 35import logging 36import pprint 37import urllib2 38 39from datetime import datetime, timedelta 40 41logging.basicConfig(format='%(asctime)s %(message)s') 42 43PRS = 'https://api.github.com/repos/grpc/grpc/pulls?state=open&per_page=100' 44COMMITS = 'https://api.github.com/repos/grpc/grpc/pulls/{pr_number}/commits' 45 46 47def gh(url): 48 request = urllib2.Request(url) 49 if TOKEN: 50 request.add_header('Authorization', 'token {}'.format(TOKEN)) 51 response = urllib2.urlopen(request) 52 return response.read() 53 54 55def print_csv_header(): 56 print('pr,base_time,test_time,latency_seconds,successes,failures,errors') 57 58 59def output(pr, 60 base_time, 61 test_time, 62 diff_time, 63 successes, 64 failures, 65 errors, 66 mode='human'): 67 if mode == 'human': 68 print( 69 "PR #{} base time: {} UTC, Tests completed at: {} UTC. Latency: {}." 70 "\n\tSuccesses: {}, Failures: {}, Errors: {}".format( 71 pr, base_time, test_time, diff_time, successes, failures, 72 errors)) 73 elif mode == 'csv': 74 print(','.join([ 75 str(pr), 76 str(base_time), 77 str(test_time), 78 str(int((test_time - base_time).total_seconds())), 79 str(successes), 80 str(failures), 81 str(errors) 82 ])) 83 84 85def parse_timestamp(datetime_str): 86 return datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%SZ') 87 88 89def to_posix_timestamp(dt): 90 return str((dt - datetime(1970, 1, 1)).total_seconds()) 91 92 93def get_pr_data(): 94 latest_prs = json.loads(gh(PRS)) 95 res = [{ 96 'number': pr['number'], 97 'created_at': parse_timestamp(pr['created_at']), 98 'updated_at': parse_timestamp(pr['updated_at']), 99 'statuses_url': pr['statuses_url'] 100 } for pr in latest_prs] 101 return res 102 103 104def get_commits_data(pr_number): 105 commits = json.loads(gh(COMMITS.format(pr_number=pr_number))) 106 return { 107 'num_commits': 108 len(commits), 109 'most_recent_date': 110 parse_timestamp(commits[-1]['commit']['author']['date']) 111 } 112 113 114def get_status_data(statuses_url, system): 115 status_url = statuses_url.replace('statuses', 'status') 116 statuses = json.loads(gh(status_url + '?per_page=100')) 117 successes = 0 118 failures = 0 119 errors = 0 120 latest_datetime = None 121 if not statuses: return None 122 if system == 'kokoro': string_in_target_url = 'kokoro' 123 elif system == 'jenkins': string_in_target_url = 'grpc-testing' 124 for status in statuses['statuses']: 125 if not status['target_url'] or string_in_target_url not in status[ 126 'target_url']: 127 continue # Ignore jenkins 128 if status['state'] == 'pending': return None 129 elif status['state'] == 'success': successes += 1 130 elif status['state'] == 'failure': failures += 1 131 elif status['state'] == 'error': errors += 1 132 if not latest_datetime: 133 latest_datetime = parse_timestamp(status['updated_at']) 134 else: 135 latest_datetime = max(latest_datetime, 136 parse_timestamp(status['updated_at'])) 137 # First status is the most recent one. 138 if any([successes, failures, errors 139 ]) and sum([successes, failures, errors]) > 15: 140 return { 141 'latest_datetime': latest_datetime, 142 'successes': successes, 143 'failures': failures, 144 'errors': errors 145 } 146 else: 147 return None 148 149 150def build_args_parser(): 151 import argparse 152 parser = argparse.ArgumentParser() 153 parser.add_argument('--format', 154 type=str, 155 choices=['human', 'csv'], 156 default='human', 157 help='Output format: are you a human or a machine?') 158 parser.add_argument('--system', 159 type=str, 160 choices=['jenkins', 'kokoro'], 161 required=True, 162 help='Consider only the given CI system') 163 parser.add_argument( 164 '--token', 165 type=str, 166 default='', 167 help='GitHub token to use its API with a higher rate limit') 168 return parser 169 170 171def main(): 172 import sys 173 global TOKEN 174 args_parser = build_args_parser() 175 args = args_parser.parse_args() 176 TOKEN = args.token 177 if args.format == 'csv': print_csv_header() 178 for pr_data in get_pr_data(): 179 commit_data = get_commits_data(pr_data['number']) 180 # PR with a single commit -> use the PRs creation time. 181 # else -> use the latest commit's date. 182 base_timestamp = pr_data['updated_at'] 183 if commit_data['num_commits'] > 1: 184 base_timestamp = commit_data['most_recent_date'] 185 else: 186 base_timestamp = pr_data['created_at'] 187 last_status = get_status_data(pr_data['statuses_url'], args.system) 188 if last_status: 189 diff = last_status['latest_datetime'] - base_timestamp 190 if diff < timedelta(hours=5): 191 output(pr_data['number'], 192 base_timestamp, 193 last_status['latest_datetime'], 194 diff, 195 last_status['successes'], 196 last_status['failures'], 197 last_status['errors'], 198 mode=args.format) 199 200 201if __name__ == '__main__': 202 main() 203