1#!/usr/bin/python 2 3from __future__ import print_function 4 5import argparse 6import logging 7import multiprocessing 8import os 9import subprocess 10import sys 11import time 12 13import common 14from autotest_lib.server import frontend 15from autotest_lib.site_utils.lib import infra 16 17DEPLOY_SERVER_LOCAL = ('/usr/local/autotest/site_utils/deploy_server_local.py') 18POOL_SIZE = 124 19PUSH_ORDER = {'database': 0, 20 'database_slave': 0, 21 'drone': 1, 22 'shard': 1, 23 'golo_proxy': 1, 24 'sentinel': 1, 25 'afe': 2, 26 'scheduler': 2, 27 'host_scheduler': 2, 28 'suite_scheduler': 2} 29 30 31def discover_servers(afe, server_filter=set()): 32 """Discover the in-production servers to update. 33 34 @param afe: Server to contact with RPC requests. 35 @param server_filter: A set of servers to get status for. 36 37 @returns: A list of a list of tuple of (server_name, server_status, roles). 38 The list is sorted by the order to be updated. Servers in the same 39 sublist can be pushed together. 40 41 """ 42 # Example server details.... 43 # { 44 # 'hostname': 'server1', 45 # 'status': 'backup', 46 # 'roles': ['drone', 'scheduler'], 47 # 'attributes': {'max_processes': 300} 48 # } 49 rpc = frontend.AFE(server=afe) 50 servers = rpc.run('get_servers') 51 52 # Do not update servers that need repair, and filter the server list by 53 # given server_filter if needed. 54 servers = [s for s in servers 55 if (s['status'] != 'repair_required' and 56 (not server_filter or s['hostname'] in server_filter))] 57 58 # Do not update reserve, devserver or crash_server (not YET supported). 59 servers = [s for s in servers if 'devserver' not in s['roles'] and 60 'crash_server' not in s['roles'] and 61 'reserve' not in s['roles']] 62 63 sorted_servers = [] 64 for i in range(max(PUSH_ORDER.values()) + 1): 65 sorted_servers.append([]) 66 servers_with_unknown_order = [] 67 for server in servers: 68 info = (server['hostname'], server['status'], server['roles']) 69 try: 70 order = min([PUSH_ORDER[r] for r in server['roles'] 71 if r in PUSH_ORDER]) 72 sorted_servers[order].append(info) 73 except ValueError: 74 # All roles are not indexed in PUSH_ORDER. 75 servers_with_unknown_order.append(info) 76 77 # Push all servers with unknown roles together. 78 if servers_with_unknown_order: 79 sorted_servers.append(servers_with_unknown_order) 80 81 found_servers = set([s['hostname'] for s in servers]) 82 # Inject the servers passed in by user but not found in server database. 83 extra_servers = [] 84 for server in server_filter - found_servers: 85 extra_servers.append((server, 'unknown', ['unknown'])) 86 if extra_servers: 87 sorted_servers.append(extra_servers) 88 89 return sorted_servers 90 91 92def parse_arguments(args): 93 """Parse command line arguments. 94 95 @param args: The command line arguments to parse. (usually sys.argv[1:]) 96 97 @returns An argparse.Namespace populated with argument values. 98 """ 99 parser = argparse.ArgumentParser( 100 formatter_class=argparse.RawDescriptionHelpFormatter, 101 description='Command to update an entire autotest installation.', 102 epilog=('Update all servers:\n' 103 ' deploy_server.py\n' 104 '\n' 105 'Update one server:\n' 106 ' deploy_server.py <server>\n' 107 '\n' 108 'Send arguments to remote deploy_server_local.py:\n' 109 ' deploy_server.py -- --dryrun\n' 110 '\n' 111 'See what arguments would be run on specified servers:\n' 112 ' deploy_server.py --dryrun <server_a> <server_b> --' 113 ' --skip-update\n')) 114 115 parser.add_argument('-v', '--verbose', action='store_true', dest='verbose', 116 help='Log all deploy script output.') 117 parser.add_argument('--continue', action='store_true', dest='cont', 118 help='Continue to the next server on failure.') 119 parser.add_argument('--afe', required=True, 120 help='What is the main server for this installation? (cautotest).') 121 parser.add_argument('--update_push_servers', action='store_true', 122 help='Indicate to update test_push servers.') 123 parser.add_argument('--force_update', action='store_true', 124 help='Force to run update commands for afe, tko, build_externals') 125 parser.add_argument('--dryrun', action='store_true', 126 help='Don\'t actually run remote commands.') 127 parser.add_argument('--logfile', action='store', 128 default='/tmp/deployment.log', 129 help='Path to the file to save the deployment log to. Default is ' 130 '/tmp/deployment.log') 131 parser.add_argument('args', nargs=argparse.REMAINDER, 132 help=('<server>, <server> ... -- <remote_arg>, <remote_arg> ...')) 133 134 results = parser.parse_args(args) 135 136 # We take the args list and further split it down. Everything before -- 137 # is a server name, and everything after it is an argument to pass along 138 # to deploy_server_local.py. 139 # 140 # This: 141 # server_a, server_b -- --dryrun --skip-report 142 # 143 # Becomes: 144 # args.servers['server_a', 'server_b'] 145 # args.args['--dryrun', '--skip-report'] 146 try: 147 local_args_index = results.args.index('--') + 1 148 except ValueError: 149 # If -- isn't present, they are all servers. 150 results.servers = results.args 151 results.args = [] 152 else: 153 # Split arguments. 154 results.servers = results.args[:local_args_index-1] 155 results.args = results.args[local_args_index:] 156 157 return results 158 159 160def update_server(inputs): 161 """Deploy for given server. 162 163 @param inputs: Inputs for the update action, including: 164 server: Name of the server to update. 165 status: Status of the server. 166 options: Options for the update. 167 168 @return: A tuple of (server, success, output), where: 169 server: Name of the server to be updated. 170 sucess: True if update succeeds, False otherwise. 171 output: A string of the deploy_server_local script output 172 including any errors. 173 174 """ 175 start = time.time() 176 server = inputs['server'] 177 status = inputs['status'] 178 # Shared list to record the finished server. 179 finished_servers = inputs['finished_servers'] 180 options = inputs['options'] 181 print('Updating server %s...' % server) 182 if status == 'backup': 183 extra_args = ['--skip-service-status'] 184 else: 185 extra_args = [] 186 187 cmd = ('%s %s' % 188 (DEPLOY_SERVER_LOCAL, ' '.join(options.args + extra_args))) 189 output = '%s: %s' % (server, cmd) 190 success = True 191 if not options.dryrun: 192 for i in range(5): 193 try: 194 print('[%s/5] Try to update server %s' % (i, server)) 195 output = infra.execute_command(server, cmd) 196 finished_servers.append(server) 197 break 198 except subprocess.CalledProcessError as e: 199 print('%s: Command failed with error: %s' % (server, e)) 200 success = False 201 output = e.output 202 203 print('Time used to update server %s: %s' % (server, time.time()-start)) 204 return server, success, output 205 206 207def update_in_parallel(servers, options): 208 """Update a group of servers in parallel. 209 210 @param servers: A list of tuple of (server_name, server_status, roles). 211 @param options: Options for the push. 212 213 @returns A list of servers that failed to update. 214 """ 215 # Create a list to record all the finished servers. 216 manager = multiprocessing.Manager() 217 finished_servers = manager.list() 218 219 args = [] 220 for server, status, _ in servers: 221 args.append({'server': server, 222 'status': status, 223 'finished_servers': finished_servers, 224 'options': options}) 225 # The update actions run in parallel. If any update failed, we should wait 226 # for other running updates being finished. Abort in the middle of an update 227 # may leave the server in a bad state. 228 pool = multiprocessing.pool.ThreadPool(POOL_SIZE) 229 try: 230 failed_servers = [] 231 results = pool.map_async(update_server, args) 232 pool.close() 233 234 # Track the updating progress for current group of servers. 235 incomplete_servers = set() 236 server_names = set([s[0] for s in servers]) 237 while not results.ready(): 238 incomplete_servers = server_names - set(finished_servers) 239 print('Not finished yet. %d servers in this group. ' 240 '%d servers are still running:\n%s\n' % 241 (len(servers), len(incomplete_servers), incomplete_servers)) 242 # Check the progress every 1 mins 243 results.wait(60) 244 245 # After update finished, parse the result. 246 for server, success, output in results.get(): 247 if options.dryrun: 248 print('Dry run, updating server %s is skipped.' % server) 249 else: 250 if success: 251 msg = ('Successfully updated server %s.\n' % server) 252 if options.verbose: 253 print(output) 254 print() 255 else: 256 msg = ('Failed to update server %s.\nError: %s' % 257 (server, output.strip())) 258 print(msg) 259 failed_servers.append(server) 260 # Write the result into logfile. 261 with open(options.logfile, 'a') as f: 262 f.write(msg) 263 finally: 264 pool.terminate() 265 pool.join() 266 267 return failed_servers 268 269def main(args): 270 """Main routine that drives all the real work. 271 272 @param args: The command line arguments to parse. (usually sys.argv) 273 274 @returns The system exit code. 275 """ 276 options = parse_arguments(args[1:]) 277 # Remove all the handlers from the root logger to get rid of the handlers 278 # introduced by the import packages. 279 logging.getLogger().handlers = [] 280 logging.basicConfig(level=logging.DEBUG 281 if options.verbose else logging.INFO) 282 283 print('Retrieving server status...') 284 sorted_servers = discover_servers(options.afe, set(options.servers or [])) 285 286 # Display what we plan to update. 287 print('Will update (in this order):') 288 i = 1 289 for servers in sorted_servers: 290 print('%s Group %d (%d servers) %s' % ('='*30, i, len(servers), '='*30)) 291 for server, status, roles in servers: 292 print('\t%-36s:\t%s\t%s' % (server, status, roles)) 293 i += 1 294 print() 295 296 if os.path.exists(options.logfile): 297 os.remove(options.logfile) 298 print ('Start updating, push logs of every server will be saved ' 299 'at %s' % options.logfile) 300 failed = [] 301 skipped = [] 302 for servers in sorted_servers: 303 if not failed or options.cont: 304 failed += update_in_parallel(servers, options) 305 else: 306 skipped.extend(s[0] for s in servers) # Only include server name. 307 308 if failed: 309 print('Errors updating:') 310 for server in failed: 311 print(' %s' % server) 312 print() 313 print('To retry:') 314 print(' %s <options> %s' % 315 (str(args[0]), str(' '.join(failed + skipped)))) 316 # Exit with error. 317 return 1 318 319 320if __name__ == '__main__': 321 sys.exit(main(sys.argv)) 322