• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2# Copyright 2017 gRPC authors.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""Measure the time between PR creation and completion of all tests.
16
17You'll need a github API token to avoid being rate-limited. See
18https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/
19
20This script goes over the most recent 100 pull requests. For PRs with a single
21commit, it uses the PR's creation as the initial time; otherwise, it uses the
22date of the last commit. This is somewhat fragile, and imposed by the fact that
23GitHub reports a PR's updated timestamp for any event that modifies the PR (e.g.
24comments), not just the addition of new commits.
25
26In addition, it ignores latencies greater than five hours, as that's likely due
27to a manual re-run of tests.
28"""
29
30from __future__ import absolute_import
31from __future__ import division
32from __future__ import print_function
33
34import json
35import logging
36import pprint
37import urllib2
38
39from datetime import datetime, timedelta
40
41logging.basicConfig(format='%(asctime)s %(message)s')
42
43PRS = 'https://api.github.com/repos/grpc/grpc/pulls?state=open&per_page=100'
44COMMITS = 'https://api.github.com/repos/grpc/grpc/pulls/{pr_number}/commits'
45
46
47def gh(url):
48    request = urllib2.Request(url)
49    if TOKEN:
50        request.add_header('Authorization', 'token {}'.format(TOKEN))
51    response = urllib2.urlopen(request)
52    return response.read()
53
54
55def print_csv_header():
56    print('pr,base_time,test_time,latency_seconds,successes,failures,errors')
57
58
59def output(pr,
60           base_time,
61           test_time,
62           diff_time,
63           successes,
64           failures,
65           errors,
66           mode='human'):
67    if mode == 'human':
68        print(
69            "PR #{} base time: {} UTC, Tests completed at: {} UTC. Latency: {}."
70            "\n\tSuccesses: {}, Failures: {}, Errors: {}".format(
71                pr, base_time, test_time, diff_time, successes, failures,
72                errors))
73    elif mode == 'csv':
74        print(','.join([
75            str(pr),
76            str(base_time),
77            str(test_time),
78            str(int((test_time - base_time).total_seconds())),
79            str(successes),
80            str(failures),
81            str(errors)
82        ]))
83
84
85def parse_timestamp(datetime_str):
86    return datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%SZ')
87
88
89def to_posix_timestamp(dt):
90    return str((dt - datetime(1970, 1, 1)).total_seconds())
91
92
93def get_pr_data():
94    latest_prs = json.loads(gh(PRS))
95    res = [{
96        'number': pr['number'],
97        'created_at': parse_timestamp(pr['created_at']),
98        'updated_at': parse_timestamp(pr['updated_at']),
99        'statuses_url': pr['statuses_url']
100    } for pr in latest_prs]
101    return res
102
103
104def get_commits_data(pr_number):
105    commits = json.loads(gh(COMMITS.format(pr_number=pr_number)))
106    return {
107        'num_commits':
108            len(commits),
109        'most_recent_date':
110            parse_timestamp(commits[-1]['commit']['author']['date'])
111    }
112
113
114def get_status_data(statuses_url, system):
115    status_url = statuses_url.replace('statuses', 'status')
116    statuses = json.loads(gh(status_url + '?per_page=100'))
117    successes = 0
118    failures = 0
119    errors = 0
120    latest_datetime = None
121    if not statuses:
122        return None
123    if system == 'kokoro':
124        string_in_target_url = 'kokoro'
125    elif system == 'jenkins':
126        string_in_target_url = 'grpc-testing'
127    for status in statuses['statuses']:
128        if not status['target_url'] or string_in_target_url not in status[
129                'target_url']:
130            continue  # Ignore jenkins
131        if status['state'] == 'pending':
132            return None
133        elif status['state'] == 'success':
134            successes += 1
135        elif status['state'] == 'failure':
136            failures += 1
137        elif status['state'] == 'error':
138            errors += 1
139        if not latest_datetime:
140            latest_datetime = parse_timestamp(status['updated_at'])
141        else:
142            latest_datetime = max(latest_datetime,
143                                  parse_timestamp(status['updated_at']))
144    # First status is the most recent one.
145    if any([successes, failures, errors
146           ]) and sum([successes, failures, errors]) > 15:
147        return {
148            'latest_datetime': latest_datetime,
149            'successes': successes,
150            'failures': failures,
151            'errors': errors
152        }
153    else:
154        return None
155
156
157def build_args_parser():
158    import argparse
159    parser = argparse.ArgumentParser()
160    parser.add_argument('--format',
161                        type=str,
162                        choices=['human', 'csv'],
163                        default='human',
164                        help='Output format: are you a human or a machine?')
165    parser.add_argument('--system',
166                        type=str,
167                        choices=['jenkins', 'kokoro'],
168                        required=True,
169                        help='Consider only the given CI system')
170    parser.add_argument(
171        '--token',
172        type=str,
173        default='',
174        help='GitHub token to use its API with a higher rate limit')
175    return parser
176
177
178def main():
179    import sys
180    global TOKEN
181    args_parser = build_args_parser()
182    args = args_parser.parse_args()
183    TOKEN = args.token
184    if args.format == 'csv':
185        print_csv_header()
186    for pr_data in get_pr_data():
187        commit_data = get_commits_data(pr_data['number'])
188        # PR with a single commit -> use the PRs creation time.
189        # else -> use the latest commit's date.
190        base_timestamp = pr_data['updated_at']
191        if commit_data['num_commits'] > 1:
192            base_timestamp = commit_data['most_recent_date']
193        else:
194            base_timestamp = pr_data['created_at']
195        last_status = get_status_data(pr_data['statuses_url'], args.system)
196        if last_status:
197            diff = last_status['latest_datetime'] - base_timestamp
198            if diff < timedelta(hours=5):
199                output(pr_data['number'],
200                       base_timestamp,
201                       last_status['latest_datetime'],
202                       diff,
203                       last_status['successes'],
204                       last_status['failures'],
205                       last_status['errors'],
206                       mode=args.format)
207
208
209if __name__ == '__main__':
210    main()
211