• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2
3# Copyright 2014 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7import argparse
8import datetime
9import getpass
10import json
11import os
12import smtplib
13import sys
14import time
15import urllib
16import urllib2
17
18class Emailer:
19  DEFAULT_EMAIL_PASSWORD_FILE = '.email_password'
20  GMAIL_SMTP_SERVER = 'smtp.gmail.com:587'
21  SUBJECT = 'Chrome GPU Bots Notification'
22
23  def __init__(self, email_from, email_to, email_password_file):
24    self.email_from = email_from
25    self.email_to = email_to
26    self.email_password = Emailer._getEmailPassword(email_password_file)
27
28  @staticmethod
29  def format_email_body(time_str, offline_str, failed_str, noteworthy_str):
30    return '%s%s%s%s' % (time_str, offline_str, failed_str, noteworthy_str)
31
32  def send_email(self, body):
33    message = 'From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n%s' % (self.email_from,
34            ','.join(self.email_to), Emailer.SUBJECT, body)
35
36    try:
37      server = smtplib.SMTP(Emailer.GMAIL_SMTP_SERVER)
38      server.starttls()
39      server.login(self.email_from, self.email_password)
40      server.sendmail(self.email_from, self.email_to, message)
41      server.quit()
42    except Exception as e:
43      print 'Error sending email: %s' % str(e)
44
45  def testEmailLogin(self):
46    server = smtplib.SMTP(Emailer.GMAIL_SMTP_SERVER)
47    server.starttls()
48    server.login(self.email_from, self.email_password)
49    server.quit()
50
51  @staticmethod
52  def _getEmailPassword(email_password_file):
53    password = ''
54
55    password_file = (email_password_file if email_password_file is not None
56            else Emailer.DEFAULT_EMAIL_PASSWORD_FILE)
57
58    if os.path.isfile(password_file):
59      with open(password_file, 'r') as f:
60        password = f.read().strip()
61    else:
62      password = getpass.getpass(
63              'Please enter email password for source email account: ')
64
65    return password
66
67class GpuBot:
68  def __init__(self, waterfall_name, bot_name, bot_data):
69    self.waterfall_name = waterfall_name
70    self.bot_name = bot_name
71    self.bot_data = bot_data
72    self._end_time = None
73    self._hours_since_last_run = None
74    self.failure_string = None
75    self.bot_url = None
76    self.build_url = None
77
78  def getEndTime(self):
79    return self._end_time
80
81  def setEndTime(self, end_time):
82    self._end_time = end_time
83    self._hours_since_last_run = \
84            roughTimeDiffInHours(end_time, time.localtime())
85
86  def getHoursSinceLastRun(self):
87    return self._hours_since_last_run
88
89  def toDict(self):
90    dict = {'waterfall_name': self.waterfall_name, 'bot_name': self.bot_name}
91
92    if self._end_time is not None:
93      dict['end_time'] = serialTime(self._end_time)
94      dict['hours_since_last_run'] = self._hours_since_last_run
95
96    if self.failure_string is not None:
97      dict['failure_string'] = self.failure_string
98
99    if self.bot_url is not None:
100      dict['bot_url'] = self.bot_url
101
102    if self.build_url is not None:
103      dict['build_url'] = self.build_url
104
105    return dict
106
107  @staticmethod
108  def fromDict(dict):
109    gpu_bot = GpuBot(dict['waterfall_name'], dict['bot_name'], None)
110
111    if 'end_time' in dict:
112      gpu_bot._end_time = unserializeTime(dict['end_time'])
113
114    if 'hours_since_last_run' in dict:
115      gpu_bot._hours_since_last_run = dict['hours_since_last_run']
116
117    if 'failure_string' in dict:
118      gpu_bot.failure_string = dict['failure_string']
119
120    if 'bot_url' in dict:
121      gpu_bot.bot_url = dict['bot_url']
122
123    if 'build_url' in dict:
124      gpu_bot.build_url = dict['build_url']
125
126    return gpu_bot
127
128def errorNoMostRecentBuild(waterfall_name, bot_name):
129  print 'No most recent build available: %s::%s' % (waterfall_name, bot_name)
130
131class Waterfall:
132  BASE_URL = 'http://build.chromium.org/p/'
133  BASE_BUILD_URL = BASE_URL + '%s/builders/%s'
134  SPECIFIC_BUILD_URL = BASE_URL + '%s/builders/%s/builds/%s'
135  BASE_JSON_BUILDERS_URL = BASE_URL + '%s/json/builders'
136  BASE_JSON_BUILDS_URL = BASE_URL + '%s/json/builders/%s/builds'
137  REGULAR_WATERFALLS = ['chromium.gpu',
138          'tryserver.chromium.gpu',
139          'chromium.gpu.fyi']
140  WEBKIT_GPU_BOTS = ['GPU Win Builder',
141          'GPU Win Builder (dbg)',
142          'GPU Win7 (NVIDIA)',
143          'GPU Win7 (dbg) (NVIDIA)',
144          'GPU Mac Builder',
145          'GPU Mac Builder (dbg)',
146          'GPU Mac10.7',
147          'GPU Mac10.7 (dbg)',
148          'GPU Linux Builder',
149          'GPU Linux Builder (dbg)',
150          'GPU Linux (NVIDIA)',
151          'GPU Linux (dbg) (NVIDIA)']
152  FILTERED_WATERFALLS = [('chromium.webkit', WEBKIT_GPU_BOTS)]
153
154  @staticmethod
155  def getJsonFromUrl(url):
156    conn = urllib2.urlopen(url)
157    result = conn.read()
158    conn.close()
159    return json.loads(result)
160
161  @staticmethod
162  def getBuildersJsonForWaterfall(waterfall):
163    querystring = '?filter'
164    return (Waterfall.getJsonFromUrl((Waterfall.BASE_JSON_BUILDERS_URL + '%s')
165        % (waterfall, querystring)))
166
167  @staticmethod
168  def getLastNBuildsForBuilder(n, waterfall, builder):
169    if n <= 0:
170      return {}
171
172    querystring = '?'
173
174    for i in range(n):
175      querystring += 'select=-%d&' % (i + 1)
176
177    querystring += 'filter'
178
179    return Waterfall.getJsonFromUrl((Waterfall.BASE_JSON_BUILDS_URL + '%s') %
180            (waterfall, urllib.quote(builder), querystring))
181
182  @staticmethod
183  def getFilteredBuildersJsonForWaterfall(waterfall, filter):
184    querystring = '?'
185
186    for bot_name in filter:
187      querystring += 'select=%s&' % urllib.quote(bot_name)
188
189    querystring += 'filter'
190
191    return Waterfall.getJsonFromUrl((Waterfall.BASE_JSON_BUILDERS_URL + '%s')
192            % (waterfall, querystring))
193
194  @staticmethod
195  def getAllGpuBots():
196    allbots = {k: Waterfall.getBuildersJsonForWaterfall(k)
197            for k in Waterfall.REGULAR_WATERFALLS}
198
199    filteredbots = {k[0]:
200            Waterfall.getFilteredBuildersJsonForWaterfall(k[0], k[1])
201            for k in Waterfall.FILTERED_WATERFALLS}
202
203    allbots.update(filteredbots)
204
205    return allbots
206
207  @staticmethod
208  def getOfflineBots(bots):
209    offline_bots = []
210
211    for waterfall_name in bots:
212      waterfall = bots[waterfall_name]
213
214      for bot_name in waterfall:
215        bot = waterfall[bot_name]
216
217        if bot['state'] != 'offline':
218          continue
219
220        gpu_bot = GpuBot(waterfall_name, bot_name, bot)
221        gpu_bot.bot_url = Waterfall.BASE_BUILD_URL % (waterfall_name,
222                urllib.quote(bot_name))
223
224        most_recent_build = Waterfall.getMostRecentlyCompletedBuildForBot(
225                gpu_bot)
226
227        if (most_recent_build and 'times' in most_recent_build and
228                most_recent_build['times']):
229          gpu_bot.setEndTime(time.localtime(most_recent_build['times'][1]))
230        else:
231          errorNoMostRecentBuild(waterfall_name, bot_name)
232
233        offline_bots.append(gpu_bot)
234
235    return offline_bots
236
237  @staticmethod
238  def getMostRecentlyCompletedBuildForBot(bot):
239    if bot.bot_data is not None and 'most_recent_build' in bot.bot_data:
240      return bot.bot_data['most_recent_build']
241
242    # Unfortunately, the JSON API doesn't provide a "most recent completed
243    # build" call. We just have to get some number of the most recent (including
244    # current, in-progress builds) and give up if that's not enough.
245    NUM_BUILDS = 10
246    builds = Waterfall.getLastNBuildsForBuilder(NUM_BUILDS, bot.waterfall_name,
247            bot.bot_name)
248
249    for i in range(NUM_BUILDS):
250      current_build_name = '-%d' % (i + 1)
251      current_build = builds[current_build_name]
252
253      if 'results' in current_build and current_build['results'] is not None:
254        if bot.bot_data is not None:
255          bot.bot_data['most_recent_build'] = current_build
256
257        return current_build
258
259    return None
260
261  @staticmethod
262  def getFailedBots(bots):
263    failed_bots = []
264
265    for waterfall_name in bots:
266      waterfall = bots[waterfall_name]
267
268      for bot_name in waterfall:
269        bot = waterfall[bot_name]
270        gpu_bot = GpuBot(waterfall_name, bot_name, bot)
271        gpu_bot.bot_url = Waterfall.BASE_BUILD_URL % (waterfall_name,
272                urllib.quote(bot_name))
273
274        most_recent_build = Waterfall.getMostRecentlyCompletedBuildForBot(
275                gpu_bot)
276
277        if (most_recent_build and 'text' in most_recent_build and
278                'failed' in most_recent_build['text']):
279          gpu_bot.failure_string = ' '.join(most_recent_build['text'])
280          gpu_bot.build_url = Waterfall.SPECIFIC_BUILD_URL % (waterfall_name,
281                  urllib.quote(bot_name), most_recent_build['number'])
282          failed_bots.append(gpu_bot)
283        elif not most_recent_build:
284          errorNoMostRecentBuild(waterfall_name, bot_name)
285
286    return failed_bots
287
288def formatTime(t):
289  return time.strftime("%a, %d %b %Y %H:%M:%S", t)
290
291def roughTimeDiffInHours(t1, t2):
292  datetimes = []
293
294  for t in [t1, t2]:
295    datetimes.append(datetime.datetime(t.tm_year, t.tm_mon, t.tm_mday,
296        t.tm_hour, t.tm_min, t.tm_sec))
297
298  datetime_diff = datetimes[0] - datetimes[1]
299
300  hours = float(datetime_diff.total_seconds()) / 3600.0
301
302  return abs(hours)
303
304def getBotStr(bot):
305  s = '  %s::%s\n' % (bot.waterfall_name, bot.bot_name)
306
307  if bot.failure_string is not None:
308    s += '  failure: %s\n' % bot.failure_string
309
310  if bot.getEndTime() is not None:
311    s += ('  last build end time: %s (roughly %f hours ago)\n' %
312    (formatTime(bot.getEndTime()), bot.getHoursSinceLastRun()))
313
314  if bot.bot_url is not None:
315    s += '  bot url: %s\n' % bot.bot_url
316
317  if bot.build_url is not None:
318    s += '  build url: %s\n' % bot.build_url
319
320  s += '\n'
321  return s
322
323def getBotsStr(bots):
324  s = ''
325
326  for bot in bots:
327    s += getBotStr(bot)
328
329  s += '\n'
330  return s
331
332def getOfflineBotsStr(offline_bots):
333  return 'Offline bots:\n%s' % getBotsStr(offline_bots)
334
335def getFailedBotsStr(failed_bots):
336  return 'Failed bots:\n%s' % getBotsStr(failed_bots)
337
338def getBotDicts(bots):
339  dicts = []
340
341  for bot in bots:
342    dicts.append(bot.toDict())
343
344  return dicts
345
346def unserializeTime(t):
347  return time.struct_time((t['year'], t['mon'], t['day'], t['hour'], t['min'],
348      t['sec'], 0, 0, 0))
349
350def serialTime(t):
351  return {'year': t.tm_year, 'mon': t.tm_mon, 'day': t.tm_mday,
352          'hour': t.tm_hour, 'min': t.tm_min, 'sec': t.tm_sec}
353
354def getSummary(offline_bots, failed_bots):
355  offline_bot_dict = getBotDicts(offline_bots)
356  failed_bot_dict = getBotDicts(failed_bots)
357  return {'offline': offline_bot_dict, 'failed': failed_bot_dict}
358
359def findBot(name, lst):
360  for bot in lst:
361    if bot.bot_name == name:
362      return bot
363
364  return None
365
366def getNoteworthyEvents(offline_bots, failed_bots, previous_results):
367  CRITICAL_NUM_HOURS = 1.0
368
369  previous_offline = (previous_results['offline'] if 'offline'
370          in previous_results else [])
371
372  previous_failures = (previous_results['failed'] if 'failed'
373          in previous_results else [])
374
375  noteworthy_offline = []
376  for bot in offline_bots:
377    if bot.getHoursSinceLastRun() >= CRITICAL_NUM_HOURS:
378      previous_bot = findBot(bot.bot_name, previous_offline)
379
380      if (previous_bot is None or
381              previous_bot.getHoursSinceLastRun() < CRITICAL_NUM_HOURS):
382        noteworthy_offline.append(bot)
383
384  noteworthy_new_failures = []
385  for bot in failed_bots:
386    previous_bot = findBot(bot.bot_name, previous_failures)
387
388    if previous_bot is None:
389      noteworthy_new_failures.append(bot)
390
391  noteworthy_new_offline_recoveries = []
392  for bot in previous_offline:
393    if bot.getHoursSinceLastRun() < CRITICAL_NUM_HOURS:
394      continue
395
396    current_bot = findBot(bot.bot_name, offline_bots)
397    if current_bot is None:
398      noteworthy_new_offline_recoveries.append(bot)
399
400  noteworthy_new_failure_recoveries = []
401  for bot in previous_failures:
402    current_bot = findBot(bot.bot_name, failed_bots)
403
404    if current_bot is None:
405      noteworthy_new_failure_recoveries.append(bot)
406
407  return {'offline': noteworthy_offline, 'failed': noteworthy_new_failures,
408          'recovered_failures': noteworthy_new_failure_recoveries,
409          'recovered_offline': noteworthy_new_offline_recoveries}
410
411def getNoteworthyStr(noteworthy_events):
412  s = ''
413
414  if noteworthy_events['offline']:
415    s += 'IMPORTANT bots newly offline for over an hour:\n'
416
417    for bot in noteworthy_events['offline']:
418      s += getBotStr(bot)
419
420    s += '\n'
421
422  if noteworthy_events['failed']:
423    s += 'IMPORTANT new failing bots:\n'
424
425    for bot in noteworthy_events['failed']:
426      s += getBotStr(bot)
427
428    s += '\n'
429
430  if noteworthy_events['recovered_offline']:
431    s += 'IMPORTANT newly recovered previously offline bots:\n'
432
433    for bot in noteworthy_events['recovered_offline']:
434      s += getBotStr(bot)
435
436    s += '\n'
437
438  if noteworthy_events['recovered_failures']:
439    s += 'IMPORTANT newly recovered failing bots:\n'
440
441    for bot in noteworthy_events['recovered_failures']:
442      s += getBotStr(bot)
443
444    s += '\n'
445
446  return s
447
448def dictsToBots(bots):
449  offline_bots = []
450  for bot in bots['offline']:
451    offline_bots.append(GpuBot.fromDict(bot))
452
453  failed_bots = []
454  for bot in bots['failed']:
455    failed_bots.append(GpuBot.fromDict(bot))
456
457  return {'offline': offline_bots, 'failed': failed_bots}
458
459class GpuBotPoller:
460  DEFAULT_PREVIOUS_RESULTS_FILE = '.check_gpu_bots_previous_results'
461
462  def __init__(self, emailer, send_email_for_recovered_offline_bots,
463          send_email_for_recovered_failing_bots, send_email_on_error,
464          previous_results_file):
465    self.emailer = emailer
466
467    self.send_email_for_recovered_offline_bots = \
468            send_email_for_recovered_offline_bots
469
470    self.send_email_for_recovered_failing_bots = \
471            send_email_for_recovered_failing_bots
472
473    self.send_email_on_error = send_email_on_error
474    self.previous_results_file = previous_results_file
475
476  def shouldEmail(self, noteworthy_events):
477    if noteworthy_events['offline'] or noteworthy_events['failed']:
478      return True
479
480    if (self.send_email_for_recovered_offline_bots and
481            noteworthy_events['recovered_offline']):
482      return True
483
484    if (self.send_email_for_recovered_failing_bots and
485          noteworthy_events['recovered_failures']):
486      return True
487
488    return False
489
490  def writeResults(self, summary):
491    results_file = (self.previous_results_file
492            if self.previous_results_file is not None
493            else GpuBotPoller.DEFAULT_PREVIOUS_RESULTS_FILE)
494
495    with open(results_file, 'w') as f:
496      f.write(json.dumps(summary))
497
498  def getPreviousResults(self):
499    previous_results_file = (self.previous_results_file
500            if self.previous_results_file is not None
501            else GpuBotPoller.DEFAULT_PREVIOUS_RESULTS_FILE)
502
503    previous_results = {}
504    if os.path.isfile(previous_results_file):
505      with open(previous_results_file, 'r') as f:
506        previous_results = dictsToBots(json.loads(f.read()))
507
508    return previous_results
509
510  def checkBots(self):
511    time_str = 'Current time: %s\n\n' % (formatTime(time.localtime()))
512    print time_str
513
514    try:
515      bots = Waterfall.getAllGpuBots()
516
517      offline_bots = Waterfall.getOfflineBots(bots)
518      offline_str = getOfflineBotsStr(offline_bots)
519      print offline_str
520
521      failed_bots = Waterfall.getFailedBots(bots)
522      failed_str = getFailedBotsStr(failed_bots)
523      print failed_str
524
525      previous_results = self.getPreviousResults()
526      noteworthy_events = getNoteworthyEvents(offline_bots, failed_bots,
527              previous_results)
528
529      noteworthy_str = getNoteworthyStr(noteworthy_events)
530      print noteworthy_str
531
532      summary = getSummary(offline_bots, failed_bots)
533      self.writeResults(summary)
534
535      if (self.emailer is not None and self.shouldEmail(noteworthy_events)):
536        self.emailer.send_email(Emailer.format_email_body(time_str, offline_str,
537            failed_str, noteworthy_str))
538    except Exception as e:
539      error_str = 'Error: %s' % str(e)
540      print error_str
541
542      if self.send_email_on_error:
543        self.emailer.send_email(error_str)
544
545def parseArgs(sys_args):
546  parser = argparse.ArgumentParser(prog=sys_args[0],
547          description='Query the Chromium GPU Bots Waterfall, output ' +
548          'potential problems, and optionally repeat automatically and/or ' +
549          'email notifications of results.')
550
551  parser.add_argument('--repeat-delay', type=int, dest='repeat_delay',
552          required=False,
553          help='How often to automatically re-run the script, in minutes.')
554
555  parser.add_argument('--email-from', type=str, dest='email_from',
556          required=False,
557          help='Email address to send from. Requires also specifying ' +
558          '\'--email-to\'.')
559
560  parser.add_argument('--email-to', type=str, dest='email_to', required=False,
561          nargs='+',
562          help='Email address(es) to send to. Requires also specifying ' +
563          '\'--email-from\'')
564
565  parser.add_argument('--send-email-for-recovered-offline-bots',
566          dest='send_email_for_recovered_offline_bots', action='store_true',
567          default=False,
568          help='Send an email out when a bot which has been offline for more ' +
569          'than 1 hour goes back online.')
570
571  parser.add_argument('--send-email-for-recovered-failing-bots',
572          dest='send_email_for_recovered_failing_bots',
573          action='store_true', default=False,
574          help='Send an email when a failing bot recovers.')
575
576  parser.add_argument('--send-email-on-error',
577          dest='send_email_on_error',
578          action='store_true', default=False,
579          help='Send an email when the script has an error. For example, if ' +
580          'the server is unreachable.')
581
582  parser.add_argument('--email-password-file',
583          dest='email_password_file',
584          required=False,
585          help=(('File containing the plaintext password of the source email ' +
586          'account. By default, \'%s\' will be tried. If it does not exist, ' +
587          'you will be prompted. If you opt to store your password on disk ' +
588          'in plaintext, use of a dummy account is strongly recommended.')
589          % Emailer.DEFAULT_EMAIL_PASSWORD_FILE))
590
591  parser.add_argument('--previous-results-file',
592          dest='previous_results_file',
593          required=False,
594          help=(('File to store the results of the previous invocation of ' +
595              'this script. By default, \'%s\' will be used.')
596              % GpuBotPoller.DEFAULT_PREVIOUS_RESULTS_FILE))
597
598  args = parser.parse_args(sys_args[1:])
599
600  if args.email_from is not None and args.email_to is None:
601    parser.error('--email-from requires --email-to.')
602  elif args.email_to is not None and args.email_from is None:
603    parser.error('--email-to requires --email-from.')
604  elif args.email_from is None and args.send_email_for_recovered_offline_bots:
605    parser.error('--send-email-for-recovered-offline-bots requires ' +
606            '--email-to and --email-from.')
607  elif (args.email_from is None and args.send_email_for_recovered_failing_bots):
608    parser.error('--send-email-for-recovered-failing-bots ' +
609            'requires --email-to and --email-from.')
610  elif (args.email_from is None and args.send_email_on_error):
611    parser.error('--send-email-on-error ' +
612            'requires --email-to and --email-from.')
613  elif (args.email_password_file and
614          not os.path.isfile(args.email_password_file)):
615    parser.error('File does not exist: %s' % args.email_password_file)
616
617  return args
618
619def main(sys_args):
620  args = parseArgs(sys_args)
621
622  emailer = None
623  if args.email_from is not None and args.email_to is not None:
624    emailer = Emailer(args.email_from, args.email_to, args.email_password_file)
625
626    try:
627      emailer.testEmailLogin()
628    except Exception as e:
629      print 'Error logging into email account: %s' % str(e)
630      return 1
631
632  poller = GpuBotPoller(emailer,
633          args.send_email_for_recovered_offline_bots,
634          args.send_email_for_recovered_failing_bots,
635          args.send_email_on_error,
636          args.previous_results_file)
637
638  while True:
639    poller.checkBots()
640
641    if args.repeat_delay is None:
642      break
643
644    print 'Will run again in %d minutes...\n' % args.repeat_delay
645    time.sleep(args.repeat_delay * 60)
646
647  return 0
648
649if __name__ == '__main__':
650  sys.exit(main(sys.argv))
651