• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/python
2
3# Copyright 2017 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""
8Swarming bot manager running on servers that hold swarming bots.
9This manages running swarming bots and routinely recovers any that die.
10"""
11
12import argparse
13import logging
14import signal
15import socket
16import sys
17import time
18import urllib2
19
20import common
21from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
22from autotest_lib.site_utils.chromeos_proxy import swarming_bots
23
24from chromite.lib import metrics
25from chromite.lib import ts_mon_config
26
27
28# The seconds between consequent bot check.
29CHECK_INTERVAL = 180
30
31_shut_down = False
32
33metrics_template = 'chromeos/autotest/swarming/bot_manager/%s'
34
35def _parse_args(args):
36    """Parse system arguments."""
37    parser = argparse.ArgumentParser(
38            description='Manage the set of swarming bots running on a server')
39    parser.add_argument('afe', type=str,
40                        help='AFE to get server role and status.')
41    # TODO(xixuan): refactor together with swarming_bots.
42    parser.add_argument(
43            'id_range', type=str,
44            help='A range of integer, each bot created will be labeled '
45                 'with an id from this range. E.g. "1-200"')
46    parser.add_argument(
47            'working_dir', type=str,
48            help='A working directory where bots will store files '
49                 'generated at runtime')
50    parser.add_argument(
51            '-p', '--swarming_proxy', type=str, dest='swarming_proxy',
52            default=swarming_bots.DEFAULT_SWARMING_PROXY,
53            help='The URL of the swarming instance to talk to, '
54                 'Default to the one specified in global config')
55    parser.add_argument(
56            '-f', '--log_file', dest='log_file',
57            help='Path to the log file.')
58    parser.add_argument(
59            '-v', '--verbose', dest='verbose', action='store_true',
60            help='Verbose mode')
61
62    return parser.parse_args(args)
63
64
65def handle_signal(signum, frame):
66    """Function called when being killed.
67
68    @param signum: The signal received.
69    @param frame: Ignored.
70    """
71    del signum
72    del frame
73
74    _shut_down = True
75
76
77def is_server_in_prod(server_name, afe):
78    """Validate server's role and status.
79
80    @param server_name: the server name to be validated.
81    @param afe: the afe server to get role & status info in server_db.
82
83    @return: A boolean value, True when the server_name is in prod, False
84             otherwise, or if RPC fails.
85    """
86    logging.info('Validating server: %s', server_name)
87    afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10,
88                                        server=afe)
89    is_prod_proxy_server = False
90    try:
91        if afe.run('get_servers', hostname=server_name,
92                   status='primary', role='golo_proxy'):
93            is_prod_proxy_server = True
94
95    except urllib2.URLError as e:
96        logging.warning('RPC get_servers failed on afe %s: %s', afe, str(e))
97    finally:
98        metrics.Counter(metrics_template % 'server_in_prod_check').increment(
99                fields={'success': is_prod_proxy_server})
100        return is_prod_proxy_server
101
102
103@metrics.SecondsTimerDecorator(metrics_template % 'tick')
104def tick(afe, bot_manager):
105    """One tick for swarming bot manager.
106
107    @param afe: the afe to check server role.
108    @param bot_manager: a swarming_bots.BotManager instance.
109    """
110    if is_server_in_prod(socket.getfqdn(), afe):
111        bot_manager.check()
112
113
114def main(args):
115    """Main func.
116
117    @args: A list of system arguments.
118    """
119    args = _parse_args(args)
120    swarming_bots.setup_logging(args.verbose, args.log_file)
121
122    if not args.swarming_proxy:
123        logging.error(
124                'No swarming proxy instance specified. '
125                'Specify swarming_proxy in [CROS] in shadow_config, '
126                'or use --swarming_proxy')
127        return 1
128
129    if not args.swarming_proxy.startswith('https://'):
130        swarming_proxy = 'https://' + args.swarming_proxy
131    else:
132        swarming_proxy = args.swarming_proxy
133
134    global _shut_down
135    logging.info("Setting signal handler.")
136    signal.signal(signal.SIGINT, handle_signal)
137    signal.signal(signal.SIGTERM, handle_signal)
138
139    bot_manager = swarming_bots.BotManager(
140            swarming_bots.parse_range(args.id_range),
141            args.working_dir,
142            args.swarming_proxy)
143    is_prod = False
144    retryable = True
145    with ts_mon_config.SetupTsMonGlobalState('swarming_bots', indirect=True):
146        while not _shut_down:
147            tick(args.afe, bot_manager)
148            time.sleep(CHECK_INTERVAL)
149
150
151if __name__ == '__main__':
152    sys.exit(main(sys.argv[1:]))
153