1#!/usr/bin/env python2 2# 3# Copyright 2015 Google INc. All Rights Reserved. 4"""This module controls locking and unlocking of test machines.""" 5 6from __future__ import print_function 7 8import argparse 9import getpass 10import os 11import sys 12import traceback 13 14from cros_utils import logger 15from cros_utils import machines 16 17 18class AFELockException(Exception): 19 """Base class for exceptions in this module.""" 20 21 22class MachineNotPingable(AFELockException): 23 """Raised when machine does not respond to ping.""" 24 25 26class MissingHostInfo(AFELockException): 27 """Raised when cannot find info about machine on machine servers.""" 28 29 30class UpdateNonLocalMachine(AFELockException): 31 """Raised when user requests to add/remove a ChromeOS HW Lab machine..""" 32 33 34class DuplicateAdd(AFELockException): 35 """Raised when user requests to add a machine that's already on the server.""" 36 37 38class UpdateServerError(AFELockException): 39 """Raised when attempt to add/remove a machine from local server fails.""" 40 41 42class LockingError(AFELockException): 43 """Raised when server fails to lock/unlock machine as requested.""" 44 45 46class DontOwnLock(AFELockException): 47 """Raised when user attmepts to unlock machine locked by someone else.""" 48 # This should not be raised if the user specified '--force' 49 50 51class NoAFEServer(AFELockException): 52 """Raised when cannot find/access the autotest server.""" 53 54 55class AFEAccessError(AFELockException): 56 """Raised when cannot get information about lab machine from lab server.""" 57 58 59class AFELockManager(object): 60 """Class for locking/unlocking machines vie Autotest Front End servers. 61 62 This class contains methods for checking the locked status of machines 63 on both the ChromeOS HW Lab AFE server and a local AFE server. It also 64 has methods for adding/removing machines from the local server, and for 65 changing the lock status of machines on either server. For the ChromeOS 66 HW Lab, it only allows access to the toolchain team lab machines, as 67 defined in toolchain-utils/crosperf/default_remotes. By default it will 68 look for a local server on chrotomation2.svl.corp.google.com, but an 69 alternative local AFE server can be supplied, if desired. 70 71 !!!IMPORTANT NOTE!!! The AFE server can only be called from the main 72 thread/process of a program. If you launch threads and try to call it 73 from a thread, you will get an error. This has to do with restrictions 74 in the Python virtual machine (and signal handling) and cannot be changed. 75 """ 76 77 LOCAL_SERVER = 'chrotomation2.svl.corp.google.com' 78 79 def __init__(self, 80 remotes, 81 force_option, 82 chromeos_root, 83 local_server, 84 use_local=True, 85 log=None): 86 """Initializes an AFELockManager object. 87 88 Args: 89 remotes: A list of machine names or ip addresses to be managed. Names 90 and ip addresses should be represented as strings. If the list is 91 empty, the lock manager will get all known machines. 92 force_option: A Boolean indicating whether or not to force an unlock of 93 a machine that was locked by someone else. 94 chromeos_root: The ChromeOS chroot to use for the autotest scripts. 95 local_server: A string containing the name or ip address of the machine 96 that is running an AFE server, which is to be used for managing 97 machines that are not in the ChromeOS HW lab. 98 local: A Boolean indicating whether or not to use/allow a local AFE 99 server to be used (see local_server argument). 100 use_local: Use the local server instead of the official one. 101 log: If not None, this is the logger object to be used for writing out 102 informational output messages. It is expected to be an instance of 103 Logger class from cros_utils/logger.py. 104 """ 105 self.chromeos_root = chromeos_root 106 self.user = getpass.getuser() 107 self.logger = log or logger.GetLogger() 108 autotest_path = os.path.join(chromeos_root, 109 'src/third_party/autotest/files') 110 111 sys.path.append(chromeos_root) 112 sys.path.append(autotest_path) 113 sys.path.append(os.path.join(autotest_path, 'server', 'cros')) 114 115 # We have to wait to do these imports until the paths above have 116 # been fixed. 117 # pylint: disable=import-error 118 from client import setup_modules 119 setup_modules.setup( 120 base_path=autotest_path, root_module_name='autotest_lib') 121 122 from dynamic_suite import frontend_wrappers 123 124 self.afe = frontend_wrappers.RetryingAFE( 125 timeout_min=30, delay_sec=10, debug=False, server='cautotest') 126 127 self.local = use_local 128 self.machines = list(set(remotes)) or [] 129 self.toolchain_lab_machines = self.GetAllToolchainLabMachines() 130 if self.machines and self.AllLabMachines(): 131 self.local = False 132 133 if not self.local: 134 self.local_afe = None 135 else: 136 dargs = {} 137 dargs['server'] = local_server or AFELockManager.LOCAL_SERVER 138 # Make sure local server is pingable. 139 error_msg = ('Local autotest server machine %s not responding to ping.' % 140 dargs['server']) 141 self.CheckMachine(dargs['server'], error_msg) 142 self.local_afe = frontend_wrappers.RetryingAFE( 143 timeout_min=30, delay_sec=10, debug=False, **dargs) 144 if not self.machines: 145 self.machines = self.toolchain_lab_machines + self.GetAllNonlabMachines() 146 self.force = force_option 147 148 def AllLabMachines(self): 149 """Check to see if all machines being used are HW Lab machines.""" 150 all_lab = True 151 for m in self.machines: 152 if m not in self.toolchain_lab_machines: 153 all_lab = False 154 break 155 return all_lab 156 157 def CheckMachine(self, machine, error_msg): 158 """Verifies that machine is responding to ping. 159 160 Args: 161 machine: String containing the name or ip address of machine to check. 162 error_msg: Message to print if ping fails. 163 164 Raises: 165 MachineNotPingable: If machine is not responding to 'ping' 166 """ 167 if not machines.MachineIsPingable(machine, logging_level='none'): 168 cros_machine = machine + '.cros' 169 if not machines.MachineIsPingable(cros_machine, logging_level='none'): 170 raise MachineNotPingable(error_msg) 171 172 def MachineIsKnown(self, machine): 173 """Checks to see if either AFE server knows the given machine. 174 175 Args: 176 machine: String containing name or ip address of machine to check. 177 178 Returns: 179 Boolean indicating if the machine is in the list of known machines for 180 either AFE server. 181 """ 182 if machine in self.toolchain_lab_machines: 183 return True 184 elif self.local_afe and machine in self.GetAllNonlabMachines(): 185 return True 186 187 return False 188 189 def GetAllToolchainLabMachines(self): 190 """Gets a list of all the toolchain machines in the ChromeOS HW lab. 191 192 Returns: 193 A list of names of the toolchain machines in the ChromeOS HW lab. 194 """ 195 machines_file = os.path.join( 196 os.path.dirname(__file__), 'crosperf', 'default_remotes') 197 machine_list = [] 198 with open(machines_file, 'r') as input_file: 199 lines = input_file.readlines() 200 for line in lines: 201 _, remotes = line.split(':') 202 remotes = remotes.strip() 203 for r in remotes.split(): 204 machine_list.append(r.strip()) 205 return machine_list 206 207 def GetAllNonlabMachines(self): 208 """Gets a list of all known machines on the local AFE server. 209 210 Returns: 211 A list of the names of the machines on the local AFE server. 212 """ 213 non_lab_machines = [] 214 if self.local_afe: 215 non_lab_machines = self.local_afe.get_hostnames() 216 return non_lab_machines 217 218 def PrintStatusHeader(self, is_lab_machine): 219 """Prints the status header lines for machines. 220 221 Args: 222 is_lab_machine: Boolean indicating whether to print HW Lab header or 223 local machine header (different spacing). 224 """ 225 if is_lab_machine: 226 print('\nMachine (Board)\t\t\t\t\tStatus') 227 print('---------------\t\t\t\t\t------\n') 228 else: 229 print('\nMachine (Board)\t\tStatus') 230 print('---------------\t\t------\n') 231 232 def RemoveLocalMachine(self, m): 233 """Removes a machine from the local AFE server. 234 235 Args: 236 m: The machine to remove. 237 238 Raises: 239 MissingHostInfo: Can't find machine to be removed. 240 """ 241 if self.local_afe: 242 host_info = self.local_afe.get_hosts(hostname=m) 243 if host_info: 244 host_info = host_info[0] 245 host_info.delete() 246 else: 247 raise MissingHostInfo('Cannot find/delete machine %s.' % m) 248 249 def AddLocalMachine(self, m): 250 """Adds a machine to the local AFE server. 251 252 Args: 253 m: The machine to be added. 254 """ 255 if self.local_afe: 256 error_msg = 'Machine %s is not responding to ping.' % m 257 self.CheckMachine(m, error_msg) 258 self.local_afe.create_host(m) 259 260 def AddMachinesToLocalServer(self): 261 """Adds one or more machines to the local AFE server. 262 263 Verify that the requested machines are legal to add to the local server, 264 i.e. that they are not ChromeOS HW lab machines, and they are not already 265 on the local server. Call AddLocalMachine for each valid machine. 266 267 Raises: 268 DuplicateAdd: Attempt to add a machine that is already on the server. 269 UpdateNonLocalMachine: Attempt to add a ChromeOS HW lab machine. 270 UpdateServerError: Something went wrong while attempting to add a 271 machine. 272 """ 273 for m in self.machines: 274 for cros_name in [m, m + '.cros']: 275 if cros_name in self.toolchain_lab_machines: 276 raise UpdateNonLocalMachine( 277 'Machine %s is already in the ChromeOS HW' 278 'Lab. Cannot add it to local server.' % cros_name) 279 host_info = self.local_afe.get_hosts(hostname=m) 280 if host_info: 281 raise DuplicateAdd('Machine %s is already on the local server.' % m) 282 try: 283 self.AddLocalMachine(m) 284 self.logger.LogOutput('Successfully added %s to local server.' % m) 285 except Exception as e: 286 traceback.print_exc() 287 raise UpdateServerError( 288 'Error occurred while attempting to add %s. %s' % (m, str(e))) 289 290 def RemoveMachinesFromLocalServer(self): 291 """Removes one or more machines from the local AFE server. 292 293 Verify that the requested machines are legal to remove from the local 294 server, i.e. that they are not ChromeOS HW lab machines. Call 295 RemoveLocalMachine for each valid machine. 296 297 Raises: 298 UpdateServerError: Something went wrong while attempting to remove a 299 machine. 300 """ 301 for m in self.machines: 302 for cros_name in [m, m + '.cros']: 303 if cros_name in self.toolchain_lab_machines: 304 raise UpdateNonLocalMachine( 305 'Machine %s is in the ChromeOS HW Lab. ' 306 'This script cannot remove lab machines.' % cros_name) 307 try: 308 self.RemoveLocalMachine(m) 309 self.logger.LogOutput('Successfully removed %s from local server.' % m) 310 except Exception as e: 311 traceback.print_exc() 312 raise UpdateServerError('Error occurred while attempting to remove %s ' 313 '(%s).' % (m, str(e))) 314 315 def ListMachineStates(self, machine_states): 316 """Gets and prints the current status for a list of machines. 317 318 Prints out the current status for all of the machines in the current 319 AFELockManager's list of machines (set when the object is initialized). 320 321 Args: 322 machine_states: A dictionary of the current state of every machine in 323 the current AFELockManager's list of machines. Normally obtained by 324 calling AFELockManager::GetMachineStates. 325 """ 326 local_machines = [] 327 printed_hdr = False 328 for m in machine_states: 329 cros_name = m + '.cros' 330 if (m in self.toolchain_lab_machines or 331 cros_name in self.toolchain_lab_machines): 332 name = m if m in self.toolchain_lab_machines else cros_name 333 if not printed_hdr: 334 self.PrintStatusHeader(True) 335 printed_hdr = True 336 state = machine_states[m] 337 if state['locked']: 338 print('%s (%s)\tlocked by %s since %s' % 339 (name, state['board'], state['locked_by'], state['lock_time'])) 340 else: 341 print('%s (%s)\tunlocked' % (name, state['board'])) 342 else: 343 local_machines.append(m) 344 345 if local_machines: 346 self.PrintStatusHeader(False) 347 for m in local_machines: 348 state = machine_states[m] 349 if state['locked']: 350 print('%s (%s)\tlocked by %s since %s' % 351 (m, state['board'], state['locked_by'], state['lock_time'])) 352 else: 353 print('%s (%s)\tunlocked' % (m, state['board'])) 354 355 def UpdateLockInAFE(self, should_lock_machine, machine): 356 """Calls an AFE server to lock/unlock a machine. 357 358 Args: 359 should_lock_machine: Boolean indicating whether to lock the machine (True) 360 or unlock the machine (False). 361 machine: The machine to update. 362 363 Raises: 364 LockingError: An error occurred while attempting to update the machine 365 state. 366 """ 367 action = 'lock' 368 if not should_lock_machine: 369 action = 'unlock' 370 kwargs = {'locked': should_lock_machine} 371 kwargs['lock_reason'] = 'toolchain user request (%s)' % self.user 372 373 cros_name = machine + '.cros' 374 if cros_name in self.toolchain_lab_machines: 375 machine = cros_name 376 if machine in self.toolchain_lab_machines: 377 m = machine.split('.')[0] 378 afe_server = self.afe 379 else: 380 m = machine 381 afe_server = self.local_afe 382 383 try: 384 afe_server.run( 385 'modify_hosts', 386 host_filter_data={'hostname__in': [m]}, 387 update_data=kwargs) 388 except Exception as e: 389 traceback.print_exc() 390 raise LockingError('Unable to %s machine %s. %s' % (action, m, str(e))) 391 392 def UpdateMachines(self, lock_machines): 393 """Sets the locked state of the machines to the requested value. 394 395 The machines updated are the ones in self.machines (specified when the 396 class object was intialized). 397 398 Args: 399 lock_machines: Boolean indicating whether to lock the machines (True) or 400 unlock the machines (False). 401 402 Returns: 403 A list of the machines whose state was successfully updated. 404 """ 405 updated_machines = [] 406 for m in self.machines: 407 self.UpdateLockInAFE(lock_machines, m) 408 # Since we returned from self.UpdateLockInAFE we assume the request 409 # succeeded. 410 if lock_machines: 411 self.logger.LogOutput('Locked machine(s) %s.' % m) 412 else: 413 self.logger.LogOutput('Unlocked machine(s) %s.' % m) 414 updated_machines.append(m) 415 416 return updated_machines 417 418 def _InternalRemoveMachine(self, machine): 419 """Remove machine from internal list of machines. 420 421 Args: 422 machine: Name of machine to be removed from internal list. 423 """ 424 # Check to see if machine is lab machine and if so, make sure it has 425 # ".cros" on the end. 426 cros_machine = machine 427 if machine.find('rack') > 0 and machine.find('row') > 0: 428 if machine.find('.cros') == -1: 429 cros_machine = cros_machine + '.cros' 430 431 self.machines = [ 432 m for m in self.machines if m != cros_machine and m != machine 433 ] 434 435 def CheckMachineLocks(self, machine_states, cmd): 436 """Check that every machine in requested list is in the proper state. 437 438 If the cmd is 'unlock' verify that every machine is locked by requestor. 439 If the cmd is 'lock' verify that every machine is currently unlocked. 440 441 Args: 442 machine_states: A dictionary of the current state of every machine in 443 the current AFELockManager's list of machines. Normally obtained by 444 calling AFELockManager::GetMachineStates. 445 cmd: The user-requested action for the machines: 'lock' or 'unlock'. 446 447 Raises: 448 DontOwnLock: The lock on a requested machine is owned by someone else. 449 """ 450 for k, state in machine_states.iteritems(): 451 if cmd == 'unlock': 452 if not state['locked']: 453 self.logger.LogWarning('Attempt to unlock already unlocked machine ' 454 '(%s).' % k) 455 self._InternalRemoveMachine(k) 456 457 if state['locked'] and state['locked_by'] != self.user: 458 raise DontOwnLock('Attempt to unlock machine (%s) locked by someone ' 459 'else (%s).' % (k, state['locked_by'])) 460 elif cmd == 'lock': 461 if state['locked']: 462 self.logger.LogWarning( 463 'Attempt to lock already locked machine (%s)' % k) 464 self._InternalRemoveMachine(k) 465 466 def HasAFEServer(self, local): 467 """Verifies that the AFELockManager has appropriate AFE server. 468 469 Args: 470 local: Boolean indicating whether we are checking for the local server 471 (True) or for the global server (False). 472 473 Returns: 474 A boolean indicating if the AFELockManager has the requested AFE server. 475 """ 476 if local: 477 return self.local_afe is not None 478 else: 479 return self.afe is not None 480 481 def GetMachineStates(self, cmd=''): 482 """Gets the current state of all the requested machines. 483 484 Gets the current state of all the requested machines, both from the HW lab 485 sever and from the local server. Stores the data in a dictionary keyed 486 by machine name. 487 488 Args: 489 cmd: The command for which we are getting the machine states. This is 490 important because if one of the requested machines is missing we raise 491 an exception, unless the requested command is 'add'. 492 493 Returns: 494 A dictionary of machine states for all the machines in the AFELockManager 495 object. 496 497 Raises: 498 NoAFEServer: Cannot find the HW Lab or local AFE server. 499 AFEAccessError: An error occurred when querying the server about a 500 machine. 501 """ 502 if not self.HasAFEServer(False): 503 raise NoAFEServer('Error: Cannot connect to main AFE server.') 504 505 if self.local and not self.HasAFEServer(True): 506 raise NoAFEServer('Error: Cannot connect to local AFE server.') 507 508 machine_list = {} 509 for m in self.machines: 510 host_info = None 511 cros_name = m + '.cros' 512 if (m in self.toolchain_lab_machines or 513 cros_name in self.toolchain_lab_machines): 514 mod_host = m.split('.')[0] 515 host_info = self.afe.get_hosts(hostname=mod_host) 516 if not host_info: 517 raise AFEAccessError('Unable to get information about %s from main' 518 ' autotest server.' % m) 519 else: 520 host_info = self.local_afe.get_hosts(hostname=m) 521 if not host_info and cmd != 'add': 522 raise AFEAccessError('Unable to get information about %s from ' 523 'local autotest server.' % m) 524 if host_info: 525 host_info = host_info[0] 526 name = host_info.hostname 527 values = {} 528 values['board'] = host_info.platform if host_info.platform else '??' 529 values['locked'] = host_info.locked 530 if host_info.locked: 531 values['locked_by'] = host_info.locked_by 532 values['lock_time'] = host_info.lock_time 533 else: 534 values['locked_by'] = '' 535 values['lock_time'] = '' 536 machine_list[name] = values 537 else: 538 machine_list[m] = {} 539 return machine_list 540 541 542def Main(argv): 543 """Parse the options, initialize lock manager and dispatch proper method. 544 545 Args: 546 argv: The options with which this script was invoked. 547 548 Returns: 549 0 unless an exception is raised. 550 """ 551 parser = argparse.ArgumentParser() 552 553 parser.add_argument( 554 '--list', 555 dest='cmd', 556 action='store_const', 557 const='status', 558 help='List current status of all known machines.') 559 parser.add_argument( 560 '--lock', 561 dest='cmd', 562 action='store_const', 563 const='lock', 564 help='Lock given machine(s).') 565 parser.add_argument( 566 '--unlock', 567 dest='cmd', 568 action='store_const', 569 const='unlock', 570 help='Unlock given machine(s).') 571 parser.add_argument( 572 '--status', 573 dest='cmd', 574 action='store_const', 575 const='status', 576 help='List current status of given machine(s).') 577 parser.add_argument( 578 '--add_machine', 579 dest='cmd', 580 action='store_const', 581 const='add', 582 help='Add machine to local machine server.') 583 parser.add_argument( 584 '--remove_machine', 585 dest='cmd', 586 action='store_const', 587 const='remove', 588 help='Remove machine from the local machine server.') 589 parser.add_argument( 590 '--nolocal', 591 dest='local', 592 action='store_false', 593 default=True, 594 help='Do not try to use local machine server.') 595 parser.add_argument( 596 '--remote', dest='remote', help='machines on which to operate') 597 parser.add_argument( 598 '--chromeos_root', 599 dest='chromeos_root', 600 required=True, 601 help='ChromeOS root to use for autotest scripts.') 602 parser.add_argument( 603 '--local_server', 604 dest='local_server', 605 default=None, 606 help='Alternate local autotest server to use.') 607 parser.add_argument( 608 '--force', 609 dest='force', 610 action='store_true', 611 default=False, 612 help='Force lock/unlock of machines, even if not' 613 ' current lock owner.') 614 615 options = parser.parse_args(argv) 616 617 if not options.remote and options.cmd != 'status': 618 parser.error('No machines specified for operation.') 619 620 if not os.path.isdir(options.chromeos_root): 621 parser.error('Cannot find chromeos_root: %s.' % options.chromeos_root) 622 623 if not options.cmd: 624 parser.error('No operation selected (--list, --status, --lock, --unlock,' 625 ' --add_machine, --remove_machine).') 626 627 machine_list = [] 628 if options.remote: 629 machine_list = options.remote.split() 630 631 lock_manager = AFELockManager(machine_list, options.force, 632 options.chromeos_root, options.local_server, 633 options.local) 634 635 machine_states = lock_manager.GetMachineStates(cmd=options.cmd) 636 cmd = options.cmd 637 638 if cmd == 'status': 639 lock_manager.ListMachineStates(machine_states) 640 641 elif cmd == 'lock': 642 if not lock_manager.force: 643 lock_manager.CheckMachineLocks(machine_states, cmd) 644 lock_manager.UpdateMachines(True) 645 646 elif cmd == 'unlock': 647 if not lock_manager.force: 648 lock_manager.CheckMachineLocks(machine_states, cmd) 649 lock_manager.UpdateMachines(False) 650 651 elif cmd == 'add': 652 lock_manager.AddMachinesToLocalServer() 653 654 elif cmd == 'remove': 655 lock_manager.RemoveMachinesFromLocalServer() 656 657 return 0 658 659 660if __name__ == '__main__': 661 sys.exit(Main(sys.argv[1:])) 662