• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright 2019 The Chromium OS Authors. All rights reserved.
5# Use of this source code is governed by a BSD-style license that can be
6# found in the LICENSE file.
7
8"""This module controls locking and unlocking of test machines."""
9
10from __future__ import print_function
11
12import argparse
13import enum
14import getpass
15import os
16import sys
17
18import file_lock_machine
19
20from cros_utils import command_executer
21from cros_utils import logger
22from cros_utils import machines
23
24
25class LockException(Exception):
26  """Base class for exceptions in this module."""
27
28
29class MachineNotPingable(LockException):
30  """Raised when machine does not respond to ping."""
31
32
33class LockingError(LockException):
34  """Raised when server fails to lock/unlock machine as requested."""
35
36
37class DontOwnLock(LockException):
38  """Raised when user attmepts to unlock machine locked by someone else."""
39  # This should not be raised if the user specified '--force'
40
41
42class MachineType(enum.Enum):
43  """Enum class to hold machine type."""
44  LOCAL = 'local'
45  SKYLAB = 'skylab'
46
47
48class LockManager(object):
49  """Class for locking/unlocking machines vie three different modes.
50
51  This class contains methods for checking the locked status of machines,
52  and for changing the locked status.  It handles HW lab machines and local
53  machines, using appropriate locking mechanisms for each.
54  """
55
56  SKYLAB_PATH = 'skylab'
57
58  # TODO(zhizhouy): lease time may needs to be dynamically adjusted. For now we
59  # set it long enough to cover the period to finish nightly rotation tests.
60  LEASE_MINS = 1439
61
62  SKYLAB_CREDENTIAL = ('/usr/local/google/home/mobiletc-prebuild'
63                       '/sheriff_utils/credentials/skylab'
64                       '/chromeos-swarming-credential.json')
65  SWARMING = 'chromite/third_party/swarming.client/swarming.py'
66  SUCCESS = 0
67
68  def __init__(self,
69               remotes,
70               force_option,
71               chromeos_root,
72               locks_dir='',
73               log=None):
74    """Initializes an LockManager object.
75
76    Args:
77      remotes: A list of machine names or ip addresses to be managed.  Names
78        and ip addresses should be represented as strings.  If the list is
79        empty, the lock manager will get all known machines.
80      force_option: A Boolean indicating whether or not to force an unlock of
81        a machine that was locked by someone else.
82      chromeos_root: The ChromeOS chroot to use for the autotest scripts.
83      locks_dir: A directory used for file locking local devices.
84      log: If not None, this is the logger object to be used for writing out
85        informational output messages.  It is expected to be an instance of
86        Logger class from cros_utils/logger.py.
87    """
88    self.chromeos_root = chromeos_root
89    self.user = getpass.getuser()
90    self.logger = log or logger.GetLogger()
91    self.ce = command_executer.GetCommandExecuter(self.logger)
92
93    sys.path.append(chromeos_root)
94
95    self.locks_dir = locks_dir
96
97    self.machines = list(set(remotes)) or []
98    self.toolchain_lab_machines = self.GetAllToolchainLabMachines()
99
100    if not self.machines:
101      self.machines = self.toolchain_lab_machines
102    self.force = force_option
103
104    self.local_machines = []
105    self.skylab_machines = []
106
107  def CheckMachine(self, machine, error_msg):
108    """Verifies that machine is responding to ping.
109
110    Args:
111      machine: String containing the name or ip address of machine to check.
112      error_msg: Message to print if ping fails.
113
114    Raises:
115      MachineNotPingable:  If machine is not responding to 'ping'
116    """
117    if not machines.MachineIsPingable(machine, logging_level='none'):
118      cros_machine = machine + '.cros'
119      if not machines.MachineIsPingable(cros_machine, logging_level='none'):
120        raise MachineNotPingable(error_msg)
121
122  def GetAllToolchainLabMachines(self):
123    """Gets a list of all the toolchain machines in the ChromeOS HW lab.
124
125    Returns:
126      A list of names of the toolchain machines in the ChromeOS HW lab.
127    """
128    machines_file = os.path.join(
129        os.path.dirname(__file__), 'crosperf', 'default_remotes')
130    machine_list = []
131    with open(machines_file, 'r') as input_file:
132      lines = input_file.readlines()
133      for line in lines:
134        _, remotes = line.split(':')
135        remotes = remotes.strip()
136        for r in remotes.split():
137          machine_list.append(r.strip())
138    return machine_list
139
140  def GetMachineType(self, m):
141    """Get where the machine is located.
142
143    Args:
144      m: String containing the name or ip address of machine.
145
146    Returns:
147      Value of the type in MachineType Enum.
148    """
149    if m in self.local_machines:
150      return MachineType.LOCAL
151    if m in self.skylab_machines:
152      return MachineType.SKYLAB
153
154  def PrintStatusHeader(self):
155    """Prints the status header lines for machines."""
156    print('\nMachine (Board)\t\t\t\t\tStatus')
157    print('---------------\t\t\t\t\t------')
158
159  def PrintStatus(self, m, state, machine_type):
160    """Prints status for a single machine.
161
162    Args:
163      m: String containing the name or ip address of machine.
164      state: A dictionary of the current state of the machine.
165      machine_type: MachineType to determine where the machine is located.
166    """
167    if state['locked']:
168      print('%s (%s)\t\t%slocked by %s since %s' %
169            (m, state['board'], '\t\t' if machine_type == MachineType.LOCAL else
170             '', state['locked_by'], state['lock_time']))
171    else:
172      print(
173          '%s (%s)\t\t%sunlocked' % (m, state['board'], '\t\t' if
174                                     machine_type == MachineType.LOCAL else ''))
175
176  def AddMachineToLocal(self, machine):
177    """Adds a machine to local machine list.
178
179    Args:
180      machine: The machine to be added.
181    """
182    if machine not in self.local_machines:
183      self.local_machines.append(machine)
184
185  def AddMachineToSkylab(self, machine):
186    """Adds a machine to skylab machine list.
187
188    Args:
189      machine: The machine to be added.
190    """
191    if machine not in self.skylab_machines:
192      self.skylab_machines.append(machine)
193
194  def ListMachineStates(self, machine_states):
195    """Gets and prints the current status for a list of machines.
196
197    Prints out the current status for all of the machines in the current
198    LockManager's list of machines (set when the object is initialized).
199
200    Args:
201      machine_states: A dictionary of the current state of every machine in
202        the current LockManager's list of machines.  Normally obtained by
203        calling LockManager::GetMachineStates.
204    """
205    self.PrintStatusHeader()
206    for m in machine_states:
207      machine_type = self.GetMachineType(m)
208      state = machine_states[m]
209      self.PrintStatus(m, state, machine_type)
210
211  def UpdateLockInSkylab(self, should_lock_machine, machine):
212    """Ask skylab to lease/release a machine.
213
214    Args:
215      should_lock_machine: Boolean indicating whether to lock the machine (True)
216        or unlock the machine (False).
217      machine: The machine to update.
218
219    Returns:
220      True if requested action succeeded, else False.
221    """
222    try:
223      if should_lock_machine:
224        ret = self.LeaseSkylabMachine(machine)
225      else:
226        ret = self.ReleaseSkylabMachine(machine)
227    except Exception:
228      return False
229    return ret
230
231  def UpdateFileLock(self, should_lock_machine, machine):
232    """Use file lock for local machines,
233
234    Args:
235      should_lock_machine: Boolean indicating whether to lock the machine (True)
236        or unlock the machine (False).
237      machine: The machine to update.
238
239    Returns:
240      True if requested action succeeded, else False.
241    """
242    try:
243      if should_lock_machine:
244        ret = file_lock_machine.Machine(machine, self.locks_dir).Lock(
245            True, sys.argv[0])
246      else:
247        ret = file_lock_machine.Machine(machine, self.locks_dir).Unlock(True)
248    except Exception:
249      return False
250    return ret
251
252  def UpdateMachines(self, lock_machines):
253    """Sets the locked state of the machines to the requested value.
254
255    The machines updated are the ones in self.machines (specified when the
256    class object was intialized).
257
258    Args:
259      lock_machines: Boolean indicating whether to lock the machines (True) or
260        unlock the machines (False).
261
262    Returns:
263      A list of the machines whose state was successfully updated.
264    """
265    updated_machines = []
266    action = 'Locking' if lock_machines else 'Unlocking'
267    for m in self.machines:
268      # TODO(zhizhouy): Handling exceptions with more details when locking
269      # doesn't succeed.
270      machine_type = self.GetMachineType(m)
271      if machine_type == MachineType.SKYLAB:
272        ret = self.UpdateLockInSkylab(lock_machines, m)
273      elif machine_type == MachineType.LOCAL:
274        ret = self.UpdateFileLock(lock_machines, m)
275
276      if ret:
277        self.logger.LogOutput(
278            '%s %s machine succeeded: %s.' % (action, machine_type.value, m))
279        updated_machines.append(m)
280      else:
281        self.logger.LogOutput(
282            '%s %s machine failed: %s.' % (action, machine_type.value, m))
283
284    self.machines = updated_machines
285    return updated_machines
286
287  def _InternalRemoveMachine(self, machine):
288    """Remove machine from internal list of machines.
289
290    Args:
291      machine: Name of machine to be removed from internal list.
292    """
293    # Check to see if machine is lab machine and if so, make sure it has
294    # ".cros" on the end.
295    cros_machine = machine
296    if machine.find('rack') > 0 and machine.find('row') > 0:
297      if machine.find('.cros') == -1:
298        cros_machine = cros_machine + '.cros'
299
300    self.machines = [
301        m for m in self.machines if m not in (cros_machine, machine)
302    ]
303
304  def CheckMachineLocks(self, machine_states, cmd):
305    """Check that every machine in requested list is in the proper state.
306
307    If the cmd is 'unlock' verify that every machine is locked by requestor.
308    If the cmd is 'lock' verify that every machine is currently unlocked.
309
310    Args:
311      machine_states: A dictionary of the current state of every machine in
312        the current LockManager's list of machines.  Normally obtained by
313        calling LockManager::GetMachineStates.
314      cmd: The user-requested action for the machines: 'lock' or 'unlock'.
315
316    Raises:
317      DontOwnLock: The lock on a requested machine is owned by someone else.
318    """
319    for k, state in machine_states.items():
320      if cmd == 'unlock':
321        if not state['locked']:
322          self.logger.LogWarning('Attempt to unlock already unlocked machine '
323                                 '(%s).' % k)
324          self._InternalRemoveMachine(k)
325
326        # TODO(zhizhouy): Skylab doesn't support host info such as locked_by.
327        # Need to update this when skylab supports it.
328        if (state['locked'] and state['locked_by'] and
329            state['locked_by'] != self.user):
330          raise DontOwnLock('Attempt to unlock machine (%s) locked by someone '
331                            'else (%s).' % (k, state['locked_by']))
332      elif cmd == 'lock':
333        if state['locked']:
334          self.logger.LogWarning(
335              'Attempt to lock already locked machine (%s)' % k)
336          self._InternalRemoveMachine(k)
337
338  def GetMachineStates(self, cmd=''):
339    """Gets the current state of all the requested machines.
340
341    Gets the current state of all the requested machines. Stores the data in a
342    dictionary keyed by machine name.
343
344    Args:
345      cmd: The command for which we are getting the machine states. This is
346        important because if one of the requested machines is missing we raise
347        an exception, unless the requested command is 'add'.
348
349    Returns:
350      A dictionary of machine states for all the machines in the LockManager
351      object.
352    """
353    machine_list = {}
354    for m in self.machines:
355      # For local or skylab machines, we simply set {'locked': status} for them
356      # TODO(zhizhouy): This is a quick fix since skylab cannot return host info
357      # as afe does. We need to get more info such as locked_by when skylab
358      # supports that.
359      values = {
360          'locked': 0 if cmd == 'lock' else 1,
361          'board': '??',
362          'locked_by': '',
363          'lock_time': ''
364      }
365      machine_list[m] = values
366
367    self.ListMachineStates(machine_list)
368
369    return machine_list
370
371  def CheckMachineInSkylab(self, machine):
372    """Run command to check if machine is in Skylab or not.
373
374    Returns:
375      True if machine in skylab, else False
376    """
377    credential = ''
378    if os.path.exists(self.SKYLAB_CREDENTIAL):
379      credential = '--auth-service-account-json %s' % self.SKYLAB_CREDENTIAL
380    swarming = os.path.join(self.chromeos_root, self.SWARMING)
381    # TODO(zhizhouy): Swarming script doesn't support python3 so explicitly
382    # launch it with python2 until migrated.
383    cmd = (('python2 %s ' \
384            'query --swarming https://chromeos-swarming.appspot.com ' \
385            "%s 'bots/list?is_dead=FALSE&dimensions=dut_name:%s'") % \
386           (swarming,
387            credential,
388            machine.rstrip('.cros')))
389    exit_code, stdout, stderr = self.ce.RunCommandWOutput(cmd)
390    if exit_code:
391      raise ValueError(
392          'Querying bots failed (2); stdout: %r; stderr: %r' % (stdout, stderr))
393
394    # The command will return a json output as stdout. If machine not in skylab
395    # stdout will look like this:
396    #  {
397    #    "death_timeout": "600",
398    #    "now": "TIMESTAMP"
399    #  }
400    # Otherwise there will be a tuple starting with 'items', we simply detect
401    # this keyword for result.
402    return 'items' in stdout
403
404  def LeaseSkylabMachine(self, machine):
405    """Run command to lease dut from skylab.
406
407    Returns:
408      True if succeeded, False if failed.
409    """
410    credential = ''
411    if os.path.exists(self.SKYLAB_CREDENTIAL):
412      credential = '-service-account-json %s' % self.SKYLAB_CREDENTIAL
413    cmd = (('%s lease-dut -minutes %s %s %s') % \
414           (self.SKYLAB_PATH,
415            self.LEASE_MINS,
416            credential,
417            machine.rstrip('.cros')))
418    # Wait 120 seconds for server to start the lease task, if not started,
419    # we will treat it as unavailable.
420    check_interval_time = 120
421    retval = self.ce.RunCommand(cmd, command_timeout=check_interval_time)
422    return retval == self.SUCCESS
423
424  def ReleaseSkylabMachine(self, machine):
425    """Run command to release dut from skylab.
426
427    Returns:
428      True if succeeded, False if failed.
429    """
430    credential = ''
431    if os.path.exists(self.SKYLAB_CREDENTIAL):
432      credential = '-service-account-json %s' % self.SKYLAB_CREDENTIAL
433    cmd = (('%s release-dut %s %s') % \
434           (self.SKYLAB_PATH,
435            credential,
436            machine.rstrip('.cros')))
437    retval = self.ce.RunCommand(cmd)
438    return retval == self.SUCCESS
439
440
441def Main(argv):
442  """Parse the options, initialize lock manager and dispatch proper method.
443
444  Args:
445    argv: The options with which this script was invoked.
446
447  Returns:
448    0 unless an exception is raised.
449  """
450  parser = argparse.ArgumentParser()
451
452  parser.add_argument(
453      '--list',
454      dest='cmd',
455      action='store_const',
456      const='status',
457      help='List current status of all known machines.')
458  parser.add_argument(
459      '--lock',
460      dest='cmd',
461      action='store_const',
462      const='lock',
463      help='Lock given machine(s).')
464  parser.add_argument(
465      '--unlock',
466      dest='cmd',
467      action='store_const',
468      const='unlock',
469      help='Unlock given machine(s).')
470  parser.add_argument(
471      '--status',
472      dest='cmd',
473      action='store_const',
474      const='status',
475      help='List current status of given machine(s).')
476  parser.add_argument(
477      '--remote', dest='remote', help='machines on which to operate')
478  parser.add_argument(
479      '--chromeos_root',
480      dest='chromeos_root',
481      required=True,
482      help='ChromeOS root to use for autotest scripts.')
483  parser.add_argument(
484      '--force',
485      dest='force',
486      action='store_true',
487      default=False,
488      help='Force lock/unlock of machines, even if not'
489      ' current lock owner.')
490
491  options = parser.parse_args(argv)
492
493  if not options.remote and options.cmd != 'status':
494    parser.error('No machines specified for operation.')
495
496  if not os.path.isdir(options.chromeos_root):
497    parser.error('Cannot find chromeos_root: %s.' % options.chromeos_root)
498
499  if not options.cmd:
500    parser.error('No operation selected (--list, --status, --lock, --unlock,'
501                 ' --add_machine, --remove_machine).')
502
503  machine_list = []
504  if options.remote:
505    machine_list = options.remote.split()
506
507  lock_manager = LockManager(machine_list, options.force, options.chromeos_root)
508
509  machine_states = lock_manager.GetMachineStates(cmd=options.cmd)
510  cmd = options.cmd
511
512  if cmd == 'status':
513    lock_manager.ListMachineStates(machine_states)
514
515  elif cmd == 'lock':
516    if not lock_manager.force:
517      lock_manager.CheckMachineLocks(machine_states, cmd)
518      lock_manager.UpdateMachines(True)
519
520  elif cmd == 'unlock':
521    if not lock_manager.force:
522      lock_manager.CheckMachineLocks(machine_states, cmd)
523      lock_manager.UpdateMachines(False)
524
525  return 0
526
527
528if __name__ == '__main__':
529  sys.exit(Main(sys.argv[1:]))
530