1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3# 4# Copyright 2019 The Chromium OS Authors. All rights reserved. 5# Use of this source code is governed by a BSD-style license that can be 6# found in the LICENSE file. 7 8"""This module controls locking and unlocking of test machines.""" 9 10from __future__ import print_function 11 12import argparse 13import enum 14import getpass 15import os 16import sys 17 18import file_lock_machine 19 20from cros_utils import command_executer 21from cros_utils import logger 22from cros_utils import machines 23 24 25class LockException(Exception): 26 """Base class for exceptions in this module.""" 27 28 29class MachineNotPingable(LockException): 30 """Raised when machine does not respond to ping.""" 31 32 33class LockingError(LockException): 34 """Raised when server fails to lock/unlock machine as requested.""" 35 36 37class DontOwnLock(LockException): 38 """Raised when user attmepts to unlock machine locked by someone else.""" 39 # This should not be raised if the user specified '--force' 40 41 42class MachineType(enum.Enum): 43 """Enum class to hold machine type.""" 44 LOCAL = 'local' 45 SKYLAB = 'skylab' 46 47 48class LockManager(object): 49 """Class for locking/unlocking machines vie three different modes. 50 51 This class contains methods for checking the locked status of machines, 52 and for changing the locked status. It handles HW lab machines and local 53 machines, using appropriate locking mechanisms for each. 54 """ 55 56 SKYLAB_PATH = 'skylab' 57 58 # TODO(zhizhouy): lease time may needs to be dynamically adjusted. For now we 59 # set it long enough to cover the period to finish nightly rotation tests. 60 LEASE_MINS = 1439 61 62 SKYLAB_CREDENTIAL = ('/usr/local/google/home/mobiletc-prebuild' 63 '/sheriff_utils/credentials/skylab' 64 '/chromeos-swarming-credential.json') 65 SWARMING = 'chromite/third_party/swarming.client/swarming.py' 66 SUCCESS = 0 67 68 def __init__(self, 69 remotes, 70 force_option, 71 chromeos_root, 72 locks_dir='', 73 log=None): 74 """Initializes an LockManager object. 75 76 Args: 77 remotes: A list of machine names or ip addresses to be managed. Names 78 and ip addresses should be represented as strings. If the list is 79 empty, the lock manager will get all known machines. 80 force_option: A Boolean indicating whether or not to force an unlock of 81 a machine that was locked by someone else. 82 chromeos_root: The ChromeOS chroot to use for the autotest scripts. 83 locks_dir: A directory used for file locking local devices. 84 log: If not None, this is the logger object to be used for writing out 85 informational output messages. It is expected to be an instance of 86 Logger class from cros_utils/logger.py. 87 """ 88 self.chromeos_root = chromeos_root 89 self.user = getpass.getuser() 90 self.logger = log or logger.GetLogger() 91 self.ce = command_executer.GetCommandExecuter(self.logger) 92 93 sys.path.append(chromeos_root) 94 95 self.locks_dir = locks_dir 96 97 self.machines = list(set(remotes)) or [] 98 self.toolchain_lab_machines = self.GetAllToolchainLabMachines() 99 100 if not self.machines: 101 self.machines = self.toolchain_lab_machines 102 self.force = force_option 103 104 self.local_machines = [] 105 self.skylab_machines = [] 106 107 def CheckMachine(self, machine, error_msg): 108 """Verifies that machine is responding to ping. 109 110 Args: 111 machine: String containing the name or ip address of machine to check. 112 error_msg: Message to print if ping fails. 113 114 Raises: 115 MachineNotPingable: If machine is not responding to 'ping' 116 """ 117 if not machines.MachineIsPingable(machine, logging_level='none'): 118 cros_machine = machine + '.cros' 119 if not machines.MachineIsPingable(cros_machine, logging_level='none'): 120 raise MachineNotPingable(error_msg) 121 122 def GetAllToolchainLabMachines(self): 123 """Gets a list of all the toolchain machines in the ChromeOS HW lab. 124 125 Returns: 126 A list of names of the toolchain machines in the ChromeOS HW lab. 127 """ 128 machines_file = os.path.join( 129 os.path.dirname(__file__), 'crosperf', 'default_remotes') 130 machine_list = [] 131 with open(machines_file, 'r') as input_file: 132 lines = input_file.readlines() 133 for line in lines: 134 _, remotes = line.split(':') 135 remotes = remotes.strip() 136 for r in remotes.split(): 137 machine_list.append(r.strip()) 138 return machine_list 139 140 def GetMachineType(self, m): 141 """Get where the machine is located. 142 143 Args: 144 m: String containing the name or ip address of machine. 145 146 Returns: 147 Value of the type in MachineType Enum. 148 """ 149 if m in self.local_machines: 150 return MachineType.LOCAL 151 if m in self.skylab_machines: 152 return MachineType.SKYLAB 153 154 def PrintStatusHeader(self): 155 """Prints the status header lines for machines.""" 156 print('\nMachine (Board)\t\t\t\t\tStatus') 157 print('---------------\t\t\t\t\t------') 158 159 def PrintStatus(self, m, state, machine_type): 160 """Prints status for a single machine. 161 162 Args: 163 m: String containing the name or ip address of machine. 164 state: A dictionary of the current state of the machine. 165 machine_type: MachineType to determine where the machine is located. 166 """ 167 if state['locked']: 168 print('%s (%s)\t\t%slocked by %s since %s' % 169 (m, state['board'], '\t\t' if machine_type == MachineType.LOCAL else 170 '', state['locked_by'], state['lock_time'])) 171 else: 172 print( 173 '%s (%s)\t\t%sunlocked' % (m, state['board'], '\t\t' if 174 machine_type == MachineType.LOCAL else '')) 175 176 def AddMachineToLocal(self, machine): 177 """Adds a machine to local machine list. 178 179 Args: 180 machine: The machine to be added. 181 """ 182 if machine not in self.local_machines: 183 self.local_machines.append(machine) 184 185 def AddMachineToSkylab(self, machine): 186 """Adds a machine to skylab machine list. 187 188 Args: 189 machine: The machine to be added. 190 """ 191 if machine not in self.skylab_machines: 192 self.skylab_machines.append(machine) 193 194 def ListMachineStates(self, machine_states): 195 """Gets and prints the current status for a list of machines. 196 197 Prints out the current status for all of the machines in the current 198 LockManager's list of machines (set when the object is initialized). 199 200 Args: 201 machine_states: A dictionary of the current state of every machine in 202 the current LockManager's list of machines. Normally obtained by 203 calling LockManager::GetMachineStates. 204 """ 205 self.PrintStatusHeader() 206 for m in machine_states: 207 machine_type = self.GetMachineType(m) 208 state = machine_states[m] 209 self.PrintStatus(m, state, machine_type) 210 211 def UpdateLockInSkylab(self, should_lock_machine, machine): 212 """Ask skylab to lease/release a machine. 213 214 Args: 215 should_lock_machine: Boolean indicating whether to lock the machine (True) 216 or unlock the machine (False). 217 machine: The machine to update. 218 219 Returns: 220 True if requested action succeeded, else False. 221 """ 222 try: 223 if should_lock_machine: 224 ret = self.LeaseSkylabMachine(machine) 225 else: 226 ret = self.ReleaseSkylabMachine(machine) 227 except Exception: 228 return False 229 return ret 230 231 def UpdateFileLock(self, should_lock_machine, machine): 232 """Use file lock for local machines, 233 234 Args: 235 should_lock_machine: Boolean indicating whether to lock the machine (True) 236 or unlock the machine (False). 237 machine: The machine to update. 238 239 Returns: 240 True if requested action succeeded, else False. 241 """ 242 try: 243 if should_lock_machine: 244 ret = file_lock_machine.Machine(machine, self.locks_dir).Lock( 245 True, sys.argv[0]) 246 else: 247 ret = file_lock_machine.Machine(machine, self.locks_dir).Unlock(True) 248 except Exception: 249 return False 250 return ret 251 252 def UpdateMachines(self, lock_machines): 253 """Sets the locked state of the machines to the requested value. 254 255 The machines updated are the ones in self.machines (specified when the 256 class object was intialized). 257 258 Args: 259 lock_machines: Boolean indicating whether to lock the machines (True) or 260 unlock the machines (False). 261 262 Returns: 263 A list of the machines whose state was successfully updated. 264 """ 265 updated_machines = [] 266 action = 'Locking' if lock_machines else 'Unlocking' 267 for m in self.machines: 268 # TODO(zhizhouy): Handling exceptions with more details when locking 269 # doesn't succeed. 270 machine_type = self.GetMachineType(m) 271 if machine_type == MachineType.SKYLAB: 272 ret = self.UpdateLockInSkylab(lock_machines, m) 273 elif machine_type == MachineType.LOCAL: 274 ret = self.UpdateFileLock(lock_machines, m) 275 276 if ret: 277 self.logger.LogOutput( 278 '%s %s machine succeeded: %s.' % (action, machine_type.value, m)) 279 updated_machines.append(m) 280 else: 281 self.logger.LogOutput( 282 '%s %s machine failed: %s.' % (action, machine_type.value, m)) 283 284 self.machines = updated_machines 285 return updated_machines 286 287 def _InternalRemoveMachine(self, machine): 288 """Remove machine from internal list of machines. 289 290 Args: 291 machine: Name of machine to be removed from internal list. 292 """ 293 # Check to see if machine is lab machine and if so, make sure it has 294 # ".cros" on the end. 295 cros_machine = machine 296 if machine.find('rack') > 0 and machine.find('row') > 0: 297 if machine.find('.cros') == -1: 298 cros_machine = cros_machine + '.cros' 299 300 self.machines = [ 301 m for m in self.machines if m not in (cros_machine, machine) 302 ] 303 304 def CheckMachineLocks(self, machine_states, cmd): 305 """Check that every machine in requested list is in the proper state. 306 307 If the cmd is 'unlock' verify that every machine is locked by requestor. 308 If the cmd is 'lock' verify that every machine is currently unlocked. 309 310 Args: 311 machine_states: A dictionary of the current state of every machine in 312 the current LockManager's list of machines. Normally obtained by 313 calling LockManager::GetMachineStates. 314 cmd: The user-requested action for the machines: 'lock' or 'unlock'. 315 316 Raises: 317 DontOwnLock: The lock on a requested machine is owned by someone else. 318 """ 319 for k, state in machine_states.items(): 320 if cmd == 'unlock': 321 if not state['locked']: 322 self.logger.LogWarning('Attempt to unlock already unlocked machine ' 323 '(%s).' % k) 324 self._InternalRemoveMachine(k) 325 326 # TODO(zhizhouy): Skylab doesn't support host info such as locked_by. 327 # Need to update this when skylab supports it. 328 if (state['locked'] and state['locked_by'] and 329 state['locked_by'] != self.user): 330 raise DontOwnLock('Attempt to unlock machine (%s) locked by someone ' 331 'else (%s).' % (k, state['locked_by'])) 332 elif cmd == 'lock': 333 if state['locked']: 334 self.logger.LogWarning( 335 'Attempt to lock already locked machine (%s)' % k) 336 self._InternalRemoveMachine(k) 337 338 def GetMachineStates(self, cmd=''): 339 """Gets the current state of all the requested machines. 340 341 Gets the current state of all the requested machines. Stores the data in a 342 dictionary keyed by machine name. 343 344 Args: 345 cmd: The command for which we are getting the machine states. This is 346 important because if one of the requested machines is missing we raise 347 an exception, unless the requested command is 'add'. 348 349 Returns: 350 A dictionary of machine states for all the machines in the LockManager 351 object. 352 """ 353 machine_list = {} 354 for m in self.machines: 355 # For local or skylab machines, we simply set {'locked': status} for them 356 # TODO(zhizhouy): This is a quick fix since skylab cannot return host info 357 # as afe does. We need to get more info such as locked_by when skylab 358 # supports that. 359 values = { 360 'locked': 0 if cmd == 'lock' else 1, 361 'board': '??', 362 'locked_by': '', 363 'lock_time': '' 364 } 365 machine_list[m] = values 366 367 self.ListMachineStates(machine_list) 368 369 return machine_list 370 371 def CheckMachineInSkylab(self, machine): 372 """Run command to check if machine is in Skylab or not. 373 374 Returns: 375 True if machine in skylab, else False 376 """ 377 credential = '' 378 if os.path.exists(self.SKYLAB_CREDENTIAL): 379 credential = '--auth-service-account-json %s' % self.SKYLAB_CREDENTIAL 380 swarming = os.path.join(self.chromeos_root, self.SWARMING) 381 # TODO(zhizhouy): Swarming script doesn't support python3 so explicitly 382 # launch it with python2 until migrated. 383 cmd = (('python2 %s ' \ 384 'query --swarming https://chromeos-swarming.appspot.com ' \ 385 "%s 'bots/list?is_dead=FALSE&dimensions=dut_name:%s'") % \ 386 (swarming, 387 credential, 388 machine.rstrip('.cros'))) 389 exit_code, stdout, stderr = self.ce.RunCommandWOutput(cmd) 390 if exit_code: 391 raise ValueError( 392 'Querying bots failed (2); stdout: %r; stderr: %r' % (stdout, stderr)) 393 394 # The command will return a json output as stdout. If machine not in skylab 395 # stdout will look like this: 396 # { 397 # "death_timeout": "600", 398 # "now": "TIMESTAMP" 399 # } 400 # Otherwise there will be a tuple starting with 'items', we simply detect 401 # this keyword for result. 402 return 'items' in stdout 403 404 def LeaseSkylabMachine(self, machine): 405 """Run command to lease dut from skylab. 406 407 Returns: 408 True if succeeded, False if failed. 409 """ 410 credential = '' 411 if os.path.exists(self.SKYLAB_CREDENTIAL): 412 credential = '-service-account-json %s' % self.SKYLAB_CREDENTIAL 413 cmd = (('%s lease-dut -minutes %s %s %s') % \ 414 (self.SKYLAB_PATH, 415 self.LEASE_MINS, 416 credential, 417 machine.rstrip('.cros'))) 418 # Wait 120 seconds for server to start the lease task, if not started, 419 # we will treat it as unavailable. 420 check_interval_time = 120 421 retval = self.ce.RunCommand(cmd, command_timeout=check_interval_time) 422 return retval == self.SUCCESS 423 424 def ReleaseSkylabMachine(self, machine): 425 """Run command to release dut from skylab. 426 427 Returns: 428 True if succeeded, False if failed. 429 """ 430 credential = '' 431 if os.path.exists(self.SKYLAB_CREDENTIAL): 432 credential = '-service-account-json %s' % self.SKYLAB_CREDENTIAL 433 cmd = (('%s release-dut %s %s') % \ 434 (self.SKYLAB_PATH, 435 credential, 436 machine.rstrip('.cros'))) 437 retval = self.ce.RunCommand(cmd) 438 return retval == self.SUCCESS 439 440 441def Main(argv): 442 """Parse the options, initialize lock manager and dispatch proper method. 443 444 Args: 445 argv: The options with which this script was invoked. 446 447 Returns: 448 0 unless an exception is raised. 449 """ 450 parser = argparse.ArgumentParser() 451 452 parser.add_argument( 453 '--list', 454 dest='cmd', 455 action='store_const', 456 const='status', 457 help='List current status of all known machines.') 458 parser.add_argument( 459 '--lock', 460 dest='cmd', 461 action='store_const', 462 const='lock', 463 help='Lock given machine(s).') 464 parser.add_argument( 465 '--unlock', 466 dest='cmd', 467 action='store_const', 468 const='unlock', 469 help='Unlock given machine(s).') 470 parser.add_argument( 471 '--status', 472 dest='cmd', 473 action='store_const', 474 const='status', 475 help='List current status of given machine(s).') 476 parser.add_argument( 477 '--remote', dest='remote', help='machines on which to operate') 478 parser.add_argument( 479 '--chromeos_root', 480 dest='chromeos_root', 481 required=True, 482 help='ChromeOS root to use for autotest scripts.') 483 parser.add_argument( 484 '--force', 485 dest='force', 486 action='store_true', 487 default=False, 488 help='Force lock/unlock of machines, even if not' 489 ' current lock owner.') 490 491 options = parser.parse_args(argv) 492 493 if not options.remote and options.cmd != 'status': 494 parser.error('No machines specified for operation.') 495 496 if not os.path.isdir(options.chromeos_root): 497 parser.error('Cannot find chromeos_root: %s.' % options.chromeos_root) 498 499 if not options.cmd: 500 parser.error('No operation selected (--list, --status, --lock, --unlock,' 501 ' --add_machine, --remove_machine).') 502 503 machine_list = [] 504 if options.remote: 505 machine_list = options.remote.split() 506 507 lock_manager = LockManager(machine_list, options.force, options.chromeos_root) 508 509 machine_states = lock_manager.GetMachineStates(cmd=options.cmd) 510 cmd = options.cmd 511 512 if cmd == 'status': 513 lock_manager.ListMachineStates(machine_states) 514 515 elif cmd == 'lock': 516 if not lock_manager.force: 517 lock_manager.CheckMachineLocks(machine_states, cmd) 518 lock_manager.UpdateMachines(True) 519 520 elif cmd == 'unlock': 521 if not lock_manager.force: 522 lock_manager.CheckMachineLocks(machine_states, cmd) 523 lock_manager.UpdateMachines(False) 524 525 return 0 526 527 528if __name__ == '__main__': 529 sys.exit(Main(sys.argv[1:])) 530