• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2# Copyright 2017 gRPC authors.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""Measure the time between PR creation and completion of all tests.
16
17You'll need a github API token to avoid being rate-limited. See
18https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/
19
20This script goes over the most recent 100 pull requests. For PRs with a single
21commit, it uses the PR's creation as the initial time; othewise, it uses the
22date of the last commit. This is somewhat fragile, and imposed by the fact that
23GitHub reports a PR's updated timestamp for any event that modifies the PR (e.g.
24comments), not just the addition of new commits.
25
26In addition, it ignores latencies greater than five hours, as that's likely due
27to a manual re-run of tests.
28"""
29
30from __future__ import absolute_import
31from __future__ import division
32from __future__ import print_function
33
34import json
35import logging
36import pprint
37import urllib2
38
39from datetime import datetime, timedelta
40
41logging.basicConfig(format='%(asctime)s %(message)s')
42
43PRS = 'https://api.github.com/repos/grpc/grpc/pulls?state=open&per_page=100'
44COMMITS = 'https://api.github.com/repos/grpc/grpc/pulls/{pr_number}/commits'
45
46
47def gh(url):
48    request = urllib2.Request(url)
49    if TOKEN:
50        request.add_header('Authorization', 'token {}'.format(TOKEN))
51    response = urllib2.urlopen(request)
52    return response.read()
53
54
55def print_csv_header():
56    print('pr,base_time,test_time,latency_seconds,successes,failures,errors')
57
58
59def output(pr,
60           base_time,
61           test_time,
62           diff_time,
63           successes,
64           failures,
65           errors,
66           mode='human'):
67    if mode == 'human':
68        print(
69            "PR #{} base time: {} UTC, Tests completed at: {} UTC. Latency: {}."
70            "\n\tSuccesses: {}, Failures: {}, Errors: {}".format(
71                pr, base_time, test_time, diff_time, successes, failures,
72                errors))
73    elif mode == 'csv':
74        print(','.join([
75            str(pr),
76            str(base_time),
77            str(test_time),
78            str(int((test_time - base_time).total_seconds())),
79            str(successes),
80            str(failures),
81            str(errors)
82        ]))
83
84
85def parse_timestamp(datetime_str):
86    return datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%SZ')
87
88
89def to_posix_timestamp(dt):
90    return str((dt - datetime(1970, 1, 1)).total_seconds())
91
92
93def get_pr_data():
94    latest_prs = json.loads(gh(PRS))
95    res = [{
96        'number': pr['number'],
97        'created_at': parse_timestamp(pr['created_at']),
98        'updated_at': parse_timestamp(pr['updated_at']),
99        'statuses_url': pr['statuses_url']
100    } for pr in latest_prs]
101    return res
102
103
104def get_commits_data(pr_number):
105    commits = json.loads(gh(COMMITS.format(pr_number=pr_number)))
106    return {
107        'num_commits': len(commits),
108        'most_recent_date':
109        parse_timestamp(commits[-1]['commit']['author']['date'])
110    }
111
112
113def get_status_data(statuses_url, system):
114    status_url = statuses_url.replace('statuses', 'status')
115    statuses = json.loads(gh(status_url + '?per_page=100'))
116    successes = 0
117    failures = 0
118    errors = 0
119    latest_datetime = None
120    if not statuses: return None
121    if system == 'kokoro': string_in_target_url = 'kokoro'
122    elif system == 'jenkins': string_in_target_url = 'grpc-testing'
123    for status in statuses['statuses']:
124        if not status['target_url'] or string_in_target_url not in status['target_url']:
125            continue  # Ignore jenkins
126        if status['state'] == 'pending': return None
127        elif status['state'] == 'success': successes += 1
128        elif status['state'] == 'failure': failures += 1
129        elif status['state'] == 'error': errors += 1
130        if not latest_datetime:
131            latest_datetime = parse_timestamp(status['updated_at'])
132        else:
133            latest_datetime = max(latest_datetime,
134                                  parse_timestamp(status['updated_at']))
135    # First status is the most recent one.
136    if any([successes, failures, errors
137           ]) and sum([successes, failures, errors]) > 15:
138        return {
139            'latest_datetime': latest_datetime,
140            'successes': successes,
141            'failures': failures,
142            'errors': errors
143        }
144    else:
145        return None
146
147
148def build_args_parser():
149    import argparse
150    parser = argparse.ArgumentParser()
151    parser.add_argument(
152        '--format',
153        type=str,
154        choices=['human', 'csv'],
155        default='human',
156        help='Output format: are you a human or a machine?')
157    parser.add_argument(
158        '--system',
159        type=str,
160        choices=['jenkins', 'kokoro'],
161        required=True,
162        help='Consider only the given CI system')
163    parser.add_argument(
164        '--token',
165        type=str,
166        default='',
167        help='GitHub token to use its API with a higher rate limit')
168    return parser
169
170
171def main():
172    import sys
173    global TOKEN
174    args_parser = build_args_parser()
175    args = args_parser.parse_args()
176    TOKEN = args.token
177    if args.format == 'csv': print_csv_header()
178    for pr_data in get_pr_data():
179        commit_data = get_commits_data(pr_data['number'])
180        # PR with a single commit -> use the PRs creation time.
181        # else -> use the latest commit's date.
182        base_timestamp = pr_data['updated_at']
183        if commit_data['num_commits'] > 1:
184            base_timestamp = commit_data['most_recent_date']
185        else:
186            base_timestamp = pr_data['created_at']
187        last_status = get_status_data(pr_data['statuses_url'], args.system)
188        if last_status:
189            diff = last_status['latest_datetime'] - base_timestamp
190            if diff < timedelta(hours=5):
191                output(
192                    pr_data['number'],
193                    base_timestamp,
194                    last_status['latest_datetime'],
195                    diff,
196                    last_status['successes'],
197                    last_status['failures'],
198                    last_status['errors'],
199                    mode=args.format)
200
201
202if __name__ == '__main__':
203    main()
204