• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright 2019 The Chromium OS Authors. All rights reserved.
5# Use of this source code is governed by a BSD-style license that can be
6# found in the LICENSE file.
7
8"""This module controls locking and unlocking of test machines."""
9
10from __future__ import print_function
11
12import argparse
13import enum
14import getpass
15import os
16import sys
17
18import file_lock_machine
19
20from cros_utils import command_executer
21from cros_utils import logger
22from cros_utils import machines
23
24
25class LockException(Exception):
26  """Base class for exceptions in this module."""
27
28
29class MachineNotPingable(LockException):
30  """Raised when machine does not respond to ping."""
31
32
33class LockingError(LockException):
34  """Raised when server fails to lock/unlock machine as requested."""
35
36
37class DontOwnLock(LockException):
38  """Raised when user attmepts to unlock machine locked by someone else."""
39  # This should not be raised if the user specified '--force'
40
41
42class MachineType(enum.Enum):
43  """Enum class to hold machine type."""
44  LOCAL = 'local'
45  CROSFLEET = 'crosfleet'
46
47
48class LockManager(object):
49  """Class for locking/unlocking machines vie three different modes.
50
51  This class contains methods for checking the locked status of machines,
52  and for changing the locked status.  It handles HW lab machines and local
53  machines, using appropriate locking mechanisms for each.
54  """
55
56  CROSFLEET_PATH = 'crosfleet'
57
58  # TODO(zhizhouy): lease time may needs to be dynamically adjusted. For now we
59  # set it long enough to cover the period to finish nightly rotation tests.
60  LEASE_MINS = 1439
61
62  CROSFLEET_CREDENTIAL = ('/usr/local/google/home/mobiletc-prebuild'
63                          '/sheriff_utils/credentials/skylab'
64                          '/chromeos-swarming-credential.json')
65  SWARMING = 'chromite/third_party/swarming.client/swarming.py'
66  SUCCESS = 0
67
68  def __init__(self,
69               remotes,
70               force_option,
71               chromeos_root,
72               locks_dir='',
73               log=None):
74    """Initializes an LockManager object.
75
76    Args:
77      remotes: A list of machine names or ip addresses to be managed.  Names
78        and ip addresses should be represented as strings.  If the list is
79        empty, the lock manager will get all known machines.
80      force_option: A Boolean indicating whether or not to force an unlock of
81        a machine that was locked by someone else.
82      chromeos_root: The ChromeOS chroot to use for the autotest scripts.
83      locks_dir: A directory used for file locking local devices.
84      log: If not None, this is the logger object to be used for writing out
85        informational output messages.  It is expected to be an instance of
86        Logger class from cros_utils/logger.py.
87    """
88    self.chromeos_root = chromeos_root
89    self.user = getpass.getuser()
90    self.logger = log or logger.GetLogger()
91    self.ce = command_executer.GetCommandExecuter(self.logger)
92
93    sys.path.append(chromeos_root)
94
95    self.locks_dir = locks_dir
96
97    self.machines = list(set(remotes)) or []
98    self.toolchain_lab_machines = self.GetAllToolchainLabMachines()
99
100    if not self.machines:
101      self.machines = self.toolchain_lab_machines
102    self.force = force_option
103
104    self.local_machines = []
105    self.crosfleet_machines = []
106
107  def CheckMachine(self, machine, error_msg):
108    """Verifies that machine is responding to ping.
109
110    Args:
111      machine: String containing the name or ip address of machine to check.
112      error_msg: Message to print if ping fails.
113
114    Raises:
115      MachineNotPingable:  If machine is not responding to 'ping'
116    """
117    if not machines.MachineIsPingable(machine, logging_level='none'):
118      cros_machine = machine + '.cros'
119      if not machines.MachineIsPingable(cros_machine, logging_level='none'):
120        raise MachineNotPingable(error_msg)
121
122  def GetAllToolchainLabMachines(self):
123    """Gets a list of all the toolchain machines in the ChromeOS HW lab.
124
125    Returns:
126      A list of names of the toolchain machines in the ChromeOS HW lab.
127    """
128    machines_file = os.path.join(os.path.dirname(__file__), 'crosperf',
129                                 'default_remotes')
130    machine_list = []
131    with open(machines_file, 'r') as input_file:
132      lines = input_file.readlines()
133      for line in lines:
134        _, remotes = line.split(':')
135        remotes = remotes.strip()
136        for r in remotes.split():
137          machine_list.append(r.strip())
138    return machine_list
139
140  def GetMachineType(self, m):
141    """Get where the machine is located.
142
143    Args:
144      m: String containing the name or ip address of machine.
145
146    Returns:
147      Value of the type in MachineType Enum.
148    """
149    if m in self.local_machines:
150      return MachineType.LOCAL
151    if m in self.crosfleet_machines:
152      return MachineType.CROSFLEET
153
154  def PrintStatusHeader(self):
155    """Prints the status header lines for machines."""
156    print('\nMachine (Board)\t\t\t\t\tStatus')
157    print('---------------\t\t\t\t\t------')
158
159  def PrintStatus(self, m, state, machine_type):
160    """Prints status for a single machine.
161
162    Args:
163      m: String containing the name or ip address of machine.
164      state: A dictionary of the current state of the machine.
165      machine_type: MachineType to determine where the machine is located.
166    """
167    if state['locked']:
168      print('%s (%s)\t\t%slocked by %s since %s' %
169            (m, state['board'], '\t\t' if machine_type == MachineType.LOCAL
170             else '', state['locked_by'], state['lock_time']))
171    else:
172      print('%s (%s)\t\t%sunlocked' %
173            (m, state['board'],
174             '\t\t' if machine_type == MachineType.LOCAL else ''))
175
176  def AddMachineToLocal(self, machine):
177    """Adds a machine to local machine list.
178
179    Args:
180      machine: The machine to be added.
181    """
182    if machine not in self.local_machines:
183      self.local_machines.append(machine)
184
185  def AddMachineToCrosfleet(self, machine):
186    """Adds a machine to crosfleet machine list.
187
188    Args:
189      machine: The machine to be added.
190    """
191    if machine not in self.crosfleet_machines:
192      self.crosfleet_machines.append(machine)
193
194  def ListMachineStates(self, machine_states):
195    """Gets and prints the current status for a list of machines.
196
197    Prints out the current status for all of the machines in the current
198    LockManager's list of machines (set when the object is initialized).
199
200    Args:
201      machine_states: A dictionary of the current state of every machine in
202        the current LockManager's list of machines.  Normally obtained by
203        calling LockManager::GetMachineStates.
204    """
205    self.PrintStatusHeader()
206    for m in machine_states:
207      machine_type = self.GetMachineType(m)
208      state = machine_states[m]
209      self.PrintStatus(m, state, machine_type)
210
211  def UpdateLockInCrosfleet(self, should_lock_machine, machine):
212    """Ask crosfleet to lease/release a machine.
213
214    Args:
215      should_lock_machine: Boolean indicating whether to lock the machine (True)
216        or unlock the machine (False).
217      machine: The machine to update.
218
219    Returns:
220      True if requested action succeeded, else False.
221    """
222    try:
223      if should_lock_machine:
224        ret = self.LeaseCrosfleetMachine(machine)
225      else:
226        ret = self.ReleaseCrosfleetMachine(machine)
227    except Exception:
228      return False
229    return ret
230
231  def UpdateFileLock(self, should_lock_machine, machine):
232    """Use file lock for local machines,
233
234    Args:
235      should_lock_machine: Boolean indicating whether to lock the machine (True)
236        or unlock the machine (False).
237      machine: The machine to update.
238
239    Returns:
240      True if requested action succeeded, else False.
241    """
242    try:
243      if should_lock_machine:
244        ret = file_lock_machine.Machine(machine, self.locks_dir).Lock(
245            True, sys.argv[0])
246      else:
247        ret = file_lock_machine.Machine(machine, self.locks_dir).Unlock(True)
248    except Exception:
249      return False
250    return ret
251
252  def UpdateMachines(self, lock_machines):
253    """Sets the locked state of the machines to the requested value.
254
255    The machines updated are the ones in self.machines (specified when the
256    class object was intialized).
257
258    Args:
259      lock_machines: Boolean indicating whether to lock the machines (True) or
260        unlock the machines (False).
261
262    Returns:
263      A list of the machines whose state was successfully updated.
264    """
265    updated_machines = []
266    action = 'Locking' if lock_machines else 'Unlocking'
267    for m in self.machines:
268      # TODO(zhizhouy): Handling exceptions with more details when locking
269      # doesn't succeed.
270      machine_type = self.GetMachineType(m)
271      if machine_type == MachineType.CROSFLEET:
272        ret = self.UpdateLockInCrosfleet(lock_machines, m)
273      elif machine_type == MachineType.LOCAL:
274        ret = self.UpdateFileLock(lock_machines, m)
275
276      if ret:
277        self.logger.LogOutput('%s %s machine succeeded: %s.' %
278                              (action, machine_type.value, m))
279        updated_machines.append(m)
280      else:
281        self.logger.LogOutput('%s %s machine failed: %s.' %
282                              (action, machine_type.value, m))
283
284    self.machines = updated_machines
285    return updated_machines
286
287  def _InternalRemoveMachine(self, machine):
288    """Remove machine from internal list of machines.
289
290    Args:
291      machine: Name of machine to be removed from internal list.
292    """
293    # Check to see if machine is lab machine and if so, make sure it has
294    # ".cros" on the end.
295    cros_machine = machine
296    if machine.find('rack') > 0 and machine.find('row') > 0:
297      if machine.find('.cros') == -1:
298        cros_machine = cros_machine + '.cros'
299
300    self.machines = [
301        m for m in self.machines if m not in (cros_machine, machine)
302    ]
303
304  def CheckMachineLocks(self, machine_states, cmd):
305    """Check that every machine in requested list is in the proper state.
306
307    If the cmd is 'unlock' verify that every machine is locked by requestor.
308    If the cmd is 'lock' verify that every machine is currently unlocked.
309
310    Args:
311      machine_states: A dictionary of the current state of every machine in
312        the current LockManager's list of machines.  Normally obtained by
313        calling LockManager::GetMachineStates.
314      cmd: The user-requested action for the machines: 'lock' or 'unlock'.
315
316    Raises:
317      DontOwnLock: The lock on a requested machine is owned by someone else.
318    """
319    for k, state in machine_states.items():
320      if cmd == 'unlock':
321        if not state['locked']:
322          self.logger.LogWarning('Attempt to unlock already unlocked machine '
323                                 '(%s).' % k)
324          self._InternalRemoveMachine(k)
325
326        # TODO(zhizhouy): Crosfleet doesn't support host info such as locked_by.
327        # Need to update this when crosfleet supports it.
328        if (state['locked'] and state['locked_by']
329            and state['locked_by'] != self.user):
330          raise DontOwnLock('Attempt to unlock machine (%s) locked by someone '
331                            'else (%s).' % (k, state['locked_by']))
332      elif cmd == 'lock':
333        if state['locked']:
334          self.logger.LogWarning(
335              'Attempt to lock already locked machine (%s)' % k)
336          self._InternalRemoveMachine(k)
337
338  def GetMachineStates(self, cmd=''):
339    """Gets the current state of all the requested machines.
340
341    Gets the current state of all the requested machines. Stores the data in a
342    dictionary keyed by machine name.
343
344    Args:
345      cmd: The command for which we are getting the machine states. This is
346        important because if one of the requested machines is missing we raise
347        an exception, unless the requested command is 'add'.
348
349    Returns:
350      A dictionary of machine states for all the machines in the LockManager
351      object.
352    """
353    machine_list = {}
354    for m in self.machines:
355      # For local or crosfleet machines, we simply set {'locked': status} for
356      # them
357      # TODO(zhizhouy): This is a quick fix since crosfleet cannot return host
358      # info as afe does. We need to get more info such as locked_by when
359      # crosfleet supports that.
360      values = {
361          'locked': 0 if cmd == 'lock' else 1,
362          'board': '??',
363          'locked_by': '',
364          'lock_time': ''
365      }
366      machine_list[m] = values
367
368    self.ListMachineStates(machine_list)
369
370    return machine_list
371
372  def CheckMachineInCrosfleet(self, machine):
373    """Run command to check if machine is in Crosfleet or not.
374
375    Returns:
376      True if machine in crosfleet, else False
377    """
378    credential = ''
379    if os.path.exists(self.CROSFLEET_CREDENTIAL):
380      credential = '--auth-service-account-json %s' % self.CROSFLEET_CREDENTIAL
381    swarming = os.path.join(self.chromeos_root, self.SWARMING)
382    # TODO(zhizhouy): Swarming script doesn't support python3 so explicitly
383    # launch it with python2 until migrated.
384    cmd = (('python2 %s '
385            'query --swarming https://chromeos-swarming.appspot.com '
386            "%s 'bots/list?is_dead=FALSE&dimensions=dut_name:%s'") %
387           (swarming, credential, machine.rstrip('.cros')))
388    exit_code, stdout, stderr = self.ce.RunCommandWOutput(cmd)
389    if exit_code:
390      raise ValueError('Querying bots failed (2); stdout: %r; stderr: %r' %
391                       (stdout, stderr))
392
393    # The command will return a json output as stdout. If machine not in
394    # crosfleet, stdout will look like this:
395    #  {
396    #    "death_timeout": "600",
397    #    "now": "TIMESTAMP"
398    #  }
399    # Otherwise there will be a tuple starting with 'items', we simply detect
400    # this keyword for result.
401    return 'items' in stdout
402
403  def LeaseCrosfleetMachine(self, machine):
404    """Run command to lease dut from crosfleet.
405
406    Returns:
407      True if succeeded, False if failed.
408    """
409    credential = ''
410    if os.path.exists(self.CROSFLEET_CREDENTIAL):
411      credential = '-service-account-json %s' % self.CROSFLEET_CREDENTIAL
412    cmd = (('%s dut lease -minutes %s %s %s %s') %
413           (self.CROSFLEET_PATH, self.LEASE_MINS, credential, '-host'
414            if '.cros' in machine else '-board', machine.rstrip('.cros')))
415    # Wait 8 minutes for server to start the lease task, if not started,
416    # we will treat it as unavailable.
417    check_interval_time = 480
418    retval = self.ce.RunCommand(cmd, command_timeout=check_interval_time)
419    return retval == self.SUCCESS
420
421  def ReleaseCrosfleetMachine(self, machine):
422    """Run command to release dut from crosfleet.
423
424    Returns:
425      True if succeeded, False if failed.
426    """
427    credential = ''
428    if os.path.exists(self.CROSFLEET_CREDENTIAL):
429      credential = '-service-account-json %s' % self.CROSFLEET_CREDENTIAL
430    cmd = (('%s dut abandon %s %s') %
431           (self.CROSFLEET_PATH, credential, machine.rstrip('.cros')))
432    retval = self.ce.RunCommand(cmd)
433    return retval == self.SUCCESS
434
435
436def Main(argv):
437  """Parse the options, initialize lock manager and dispatch proper method.
438
439  Args:
440    argv: The options with which this script was invoked.
441
442  Returns:
443    0 unless an exception is raised.
444  """
445  parser = argparse.ArgumentParser()
446
447  parser.add_argument('--list',
448                      dest='cmd',
449                      action='store_const',
450                      const='status',
451                      help='List current status of all known machines.')
452  parser.add_argument('--lock',
453                      dest='cmd',
454                      action='store_const',
455                      const='lock',
456                      help='Lock given machine(s).')
457  parser.add_argument('--unlock',
458                      dest='cmd',
459                      action='store_const',
460                      const='unlock',
461                      help='Unlock given machine(s).')
462  parser.add_argument('--status',
463                      dest='cmd',
464                      action='store_const',
465                      const='status',
466                      help='List current status of given machine(s).')
467  parser.add_argument('--remote',
468                      dest='remote',
469                      help='machines on which to operate')
470  parser.add_argument('--chromeos_root',
471                      dest='chromeos_root',
472                      required=True,
473                      help='ChromeOS root to use for autotest scripts.')
474  parser.add_argument('--force',
475                      dest='force',
476                      action='store_true',
477                      default=False,
478                      help='Force lock/unlock of machines, even if not'
479                      ' current lock owner.')
480
481  options = parser.parse_args(argv)
482
483  if not options.remote and options.cmd != 'status':
484    parser.error('No machines specified for operation.')
485
486  if not os.path.isdir(options.chromeos_root):
487    parser.error('Cannot find chromeos_root: %s.' % options.chromeos_root)
488
489  if not options.cmd:
490    parser.error('No operation selected (--list, --status, --lock, --unlock,'
491                 ' --add_machine, --remove_machine).')
492
493  machine_list = []
494  if options.remote:
495    machine_list = options.remote.split()
496
497  lock_manager = LockManager(machine_list, options.force,
498                             options.chromeos_root)
499
500  machine_states = lock_manager.GetMachineStates(cmd=options.cmd)
501  cmd = options.cmd
502
503  if cmd == 'status':
504    lock_manager.ListMachineStates(machine_states)
505
506  elif cmd == 'lock':
507    if not lock_manager.force:
508      lock_manager.CheckMachineLocks(machine_states, cmd)
509      lock_manager.UpdateMachines(True)
510
511  elif cmd == 'unlock':
512    if not lock_manager.force:
513      lock_manager.CheckMachineLocks(machine_states, cmd)
514      lock_manager.UpdateMachines(False)
515
516  return 0
517
518
519if __name__ == '__main__':
520  sys.exit(Main(sys.argv[1:]))
521