1# Lint as: python2, python3 2# Copyright (c) 2019 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5# 6# Expects to be run in an environment with sudo and no interactive password 7# prompt, such as within the Chromium OS development chroot. 8 9 10"""This is a base host class for servohost and labstation.""" 11 12 13import six.moves.http_client 14import logging 15import socket 16import six.moves.xmlrpc_client 17import time 18import os 19 20from autotest_lib.client.bin import utils 21from autotest_lib.client.common_lib import autotest_enum 22from autotest_lib.client.common_lib import error 23from autotest_lib.client.common_lib import hosts 24from autotest_lib.client.common_lib import lsbrelease_utils 25from autotest_lib.client.common_lib.cros import dev_server 26from autotest_lib.client.common_lib.cros import kernel_utils 27from autotest_lib.client.cros import constants as client_constants 28from autotest_lib.server import autotest 29from autotest_lib.server import site_utils as server_utils 30from autotest_lib.server.cros import provisioner 31from autotest_lib.server.hosts import ssh_host 32from autotest_lib.site_utils.rpm_control_system import rpm_client 33 34 35class BaseServoHost(ssh_host.SSHHost): 36 """Base host class for a host that manage servo(s). 37 E.g. beaglebone, labstation. 38 """ 39 REBOOT_CMD = 'sleep 5; reboot & sleep 10; reboot -f' 40 41 TEMP_FILE_DIR = '/var/lib/servod/' 42 43 LOCK_FILE_POSTFIX = '_in_use' 44 REBOOT_FILE_POSTFIX = '_reboot' 45 46 # Time to wait a rebooting servohost, in seconds. 47 REBOOT_TIMEOUT = 240 48 49 # Timeout value to power cycle a servohost, in seconds. 50 BOOT_TIMEOUT = 240 51 52 # Constants that reflect current host update state. 53 UPDATE_STATE = autotest_enum.AutotestEnum('IDLE', 'RUNNING', 54 'PENDING_REBOOT') 55 56 def _initialize(self, hostname, is_in_lab=None, *args, **dargs): 57 """Construct a BaseServoHost object. 58 59 @param is_in_lab: True if the servo host is in Cros Lab. Default is set 60 to None, for which utils.host_is_in_lab_zone will be 61 called to check if the servo host is in Cros lab. 62 63 """ 64 super(BaseServoHost, self)._initialize(hostname=hostname, 65 *args, **dargs) 66 self._is_localhost = (self.hostname == 'localhost') 67 if self._is_localhost: 68 self._is_in_lab = False 69 elif is_in_lab is None: 70 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname) 71 else: 72 self._is_in_lab = is_in_lab 73 74 # Commands on the servo host must be run by the superuser. 75 # Our account on a remote host is root, but if our target is 76 # localhost then we might be running unprivileged. If so, 77 # `sudo` will have to be added to the commands. 78 if self._is_localhost: 79 self._sudo_required = utils.system_output('id -u') != '0' 80 else: 81 self._sudo_required = False 82 83 self._is_labstation = None 84 self._dut_host_info = None 85 self._dut_hostname = None 86 87 88 def get_board(self): 89 """Determine the board for this servo host. E.g. fizz-labstation 90 91 @returns a string representing this labstation's board or None if 92 target host is not using a ChromeOS image(e.g. test in chroot). 93 """ 94 output = self.run('cat /etc/lsb-release', ignore_status=True).stdout 95 return lsbrelease_utils.get_current_board(lsb_release_content=output) 96 97 98 def set_dut_host_info(self, dut_host_info): 99 """ 100 @param dut_host_info: A HostInfo object. 101 """ 102 logging.info('setting dut_host_info field to (%s)', dut_host_info) 103 self._dut_host_info = dut_host_info 104 105 106 def get_dut_host_info(self): 107 """ 108 @return A HostInfo object. 109 """ 110 return self._dut_host_info 111 112 113 def set_dut_hostname(self, dut_hostname): 114 """ 115 @param dut_hostname: hostname of the DUT that connected to this servo. 116 """ 117 logging.info('setting dut_hostname as (%s)', dut_hostname) 118 self._dut_hostname = dut_hostname 119 120 121 def get_dut_hostname(self): 122 """ 123 @returns hostname of the DUT that connected to this servo. 124 """ 125 return self._dut_hostname 126 127 128 def is_labstation(self): 129 """Determine if the host is a labstation 130 131 @returns True if ths host is a labstation otherwise False. 132 """ 133 if self._is_labstation is None: 134 board = self.get_board() 135 self._is_labstation = board is not None and 'labstation' in board 136 137 return self._is_labstation 138 139 140 def _get_lsb_release_content(self): 141 """Return the content of lsb-release file of host.""" 142 return self.run( 143 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip() 144 145 146 def get_release_version(self): 147 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release. 148 149 @returns The version string in lsb-release, under attribute 150 CHROMEOS_RELEASE_VERSION(e.g. 12900.0.0). None on fail. 151 """ 152 return lsbrelease_utils.get_chromeos_release_version( 153 lsb_release_content=self._get_lsb_release_content() 154 ) 155 156 157 def get_full_release_path(self): 158 """Get full release path from servohost as string. 159 160 @returns full release path as a string 161 (e.g. fizz-labstation-release/R82.12900.0.0). None on fail. 162 """ 163 return lsbrelease_utils.get_chromeos_release_builder_path( 164 lsb_release_content=self._get_lsb_release_content() 165 ) 166 167 168 def _check_update_status(self): 169 """ Check servohost's current update state. 170 171 @returns: one of below state of from self.UPDATE_STATE 172 IDLE -- if the target host is not currently updating and not 173 pending on a reboot. 174 RUNNING -- if there is another updating process that running on 175 target host(note: we don't expect to hit this scenario). 176 PENDING_REBOOT -- if the target host had an update and pending 177 on reboot. 178 """ 179 result = self.run('pgrep -f quick-provision | grep -v $$', 180 ignore_status=True) 181 # We don't expect any output unless there are another quick 182 # provision process is running. 183 if result.exit_status == 0: 184 return self.UPDATE_STATE.RUNNING 185 186 # Determine if we have an update that pending on reboot by check if 187 # the current inactive kernel has priority for the next boot. 188 try: 189 inactive_kernel = kernel_utils.get_kernel_state(self)[1] 190 next_kernel = kernel_utils.get_next_kernel(self) 191 if inactive_kernel == next_kernel: 192 return self.UPDATE_STATE.PENDING_REBOOT 193 except Exception as e: 194 logging.error('Unexpected error while checking kernel info; %s', e) 195 return self.UPDATE_STATE.IDLE 196 197 198 def is_in_lab(self): 199 """Check whether the servo host is a lab device. 200 201 @returns: True if the servo host is in Cros Lab, otherwise False. 202 203 """ 204 return self._is_in_lab 205 206 207 def is_localhost(self): 208 """Checks whether the servo host points to localhost. 209 210 @returns: True if it points to localhost, otherwise False. 211 212 """ 213 return self._is_localhost 214 215 216 def is_cros_host(self): 217 """Check if a servo host is running chromeos. 218 219 @return: True if the servo host is running chromeos. 220 False if it isn't, or we don't have enough information. 221 """ 222 try: 223 result = self.run('grep -q CHROMEOS /etc/lsb-release', 224 ignore_status=True, timeout=10) 225 except (error.AutoservRunError, error.AutoservSSHTimeout): 226 return False 227 return result.exit_status == 0 228 229 230 def prepare_for_update(self): 231 """Prepares the DUT for an update. 232 Subclasses may override this to perform any special actions 233 required before updating. 234 """ 235 pass 236 237 238 def reboot(self, *args, **dargs): 239 """Reboot using special servo host reboot command.""" 240 super(BaseServoHost, self).reboot(reboot_cmd=self.REBOOT_CMD, 241 *args, **dargs) 242 243 244 def update_image(self, stable_version=None): 245 """Update the image on the servo host, if needed. 246 247 This method recognizes the following cases: 248 * If the Host is not running Chrome OS, do nothing. 249 * If a previously triggered update is now complete, reboot 250 to the new version. 251 * If the host is processing an update do nothing. 252 * If the host has an update that pending on reboot, do nothing. 253 * If the host is running a version of Chrome OS different 254 from the default for servo Hosts, start an update. 255 256 @stable_version the target build number.(e.g. R82-12900.0.0) 257 258 @raises dev_server.DevServerException: If all the devservers are down. 259 @raises site_utils.ParseBuildNameException: If the devserver returns 260 an invalid build name. 261 """ 262 # servod could be running in a Ubuntu workstation. 263 if not self.is_cros_host(): 264 logging.info('Not attempting an update, either %s is not running ' 265 'chromeos or we cannot find enough information about ' 266 'the host.', self.hostname) 267 return 268 269 if lsbrelease_utils.is_moblab(): 270 logging.info('Not attempting an update, %s is running moblab.', 271 self.hostname) 272 return 273 274 if not stable_version: 275 logging.debug("BaseServoHost::update_image attempting to get" 276 " servo cros stable version") 277 try: 278 stable_version = (self.get_dut_host_info(). 279 servo_cros_stable_version) 280 except AttributeError: 281 logging.error("BaseServoHost::update_image failed to get" 282 " servo cros stable version.") 283 284 target_build = "%s-release/%s" % (self.get_board(), stable_version) 285 target_build_number = server_utils.ParseBuildName( 286 target_build)[3] 287 current_build_number = self.get_release_version() 288 289 if current_build_number == target_build_number: 290 logging.info('servo host %s does not require an update.', 291 self.hostname) 292 return 293 294 status = self._check_update_status() 295 if status == self.UPDATE_STATE.RUNNING: 296 logging.info('servo host %s already processing an update', 297 self.hostname) 298 return 299 if status == self.UPDATE_STATE.PENDING_REBOOT: 300 # Labstation reboot is handled separately here as it require 301 # synchronized reboot among all managed DUTs. For servo_v3, we'll 302 # reboot when initialize Servohost, if there is a update pending. 303 logging.info('An update has been completed and pending reboot.') 304 return 305 306 ds = dev_server.ImageServer.resolve(self.hostname, 307 hostname=self.hostname) 308 url = ds.get_update_url(target_build) 309 cros_provisioner = provisioner.ChromiumOSProvisioner(update_url=url, 310 host=self, 311 is_servohost=True) 312 logging.info('Using devserver url: %s to trigger update on ' 313 'servo host %s, from %s to %s', url, self.hostname, 314 current_build_number, target_build_number) 315 cros_provisioner.run_provision() 316 317 318 def has_power(self): 319 """Return whether or not the servo host is powered by PoE or RPM.""" 320 # TODO(fdeng): See crbug.com/302791 321 # For now, assume all servo hosts in the lab have power. 322 return self.is_in_lab() 323 324 325 def _post_update_reboot(self): 326 """ Reboot servohost after an quick provision. 327 328 We need to do some specifal cleanup before and after reboot 329 when there is an update pending. 330 """ 331 # Regarding the 'crossystem' command below: In some cases, 332 # the update flow puts the TPM into a state such that it 333 # fails verification. We don't know why. However, this 334 # call papers over the problem by clearing the TPM during 335 # the reboot. 336 # 337 # We ignore failures from 'crossystem'. Although failure 338 # here is unexpected, and could signal a bug, the point of 339 # the exercise is to paper over problems; allowing this to 340 # fail would defeat the purpose. 341 342 # Preserve critical files before reboot since post-provision 343 # clobbering will wipe the stateful partition. 344 # TODO(xianuowang@) Remove this logic once we have updated to 345 # a image with https://crrev.com/c/2485908. 346 path_to_preserve = [ 347 '/var/lib/servod', 348 '/var/lib/device_health_profile', 349 ] 350 safe_location = '/mnt/stateful_partition/unencrypted/preserve/' 351 for item in path_to_preserve: 352 dest = os.path.join(safe_location, item.split('/')[-1]) 353 self.run('rm -rf %s' % dest, ignore_status=True) 354 self.run('mv %s %s' % (item, safe_location), ignore_status=True) 355 356 self.run('crossystem clear_tpm_owner_request=1', ignore_status=True) 357 self._servo_host_reboot() 358 logging.debug('Cleaning up autotest directories if exist.') 359 try: 360 installed_autodir = autotest.Autotest.get_installed_autodir(self) 361 self.run('rm -rf ' + installed_autodir) 362 except autotest.AutodirNotFoundError: 363 logging.debug('No autotest installed directory found.') 364 365 # Recover preserved files to original location. 366 # TODO(xianuowang@) Remove this logic once we have updated to 367 # a image with https://crrev.com/c/2485908. 368 for item in path_to_preserve: 369 src = os.path.join(safe_location, item.split('/')[-1]) 370 dest = '/'.join(item.split('/')[:-1]) 371 self.run('mv %s %s' % (src, dest), ignore_status=True) 372 373 def power_cycle(self): 374 """Cycle power to this host via PoE(servo v3) or RPM(labstation) 375 if it is a lab device. 376 377 @raises AutoservRepairError if it fails to power cycle the 378 servo host. 379 380 """ 381 if self.has_power(): 382 try: 383 rpm_client.set_power(self, 'CYCLE') 384 except (socket.error, six.moves.xmlrpc_client.Error, 385 six.moves.http_client.BadStatusLine, 386 rpm_client.RemotePowerException) as e: 387 raise hosts.AutoservRepairError( 388 'Power cycling %s failed: %s' % (self.hostname, e), 389 'power_cycle_via_rpm_failed' 390 ) 391 else: 392 logging.info('Skipping power cycling, not a lab device.') 393 394 395 def _servo_host_reboot(self): 396 """Reboot this servo host because a reboot is requested.""" 397 logging.info('Rebooting servo host %s from build %s', self.hostname, 398 self.get_release_version()) 399 # Tell the reboot() call not to wait for completion. 400 # Otherwise, the call will log reboot failure if servo does 401 # not come back. The logged reboot failure will lead to 402 # test job failure. If the test does not require servo, we 403 # don't want servo failure to fail the test with error: 404 # `Host did not return from reboot` in status.log. 405 self.reboot(fastsync=True, wait=False) 406 407 # We told the reboot() call not to wait, but we need to wait 408 # for the reboot before we continue. Alas. The code from 409 # here below is basically a copy of Host.wait_for_restart(), 410 # with the logging bits ripped out, so that they can't cause 411 # the failure logging problem described above. 412 # 413 # The black stain that this has left on my soul can never be 414 # erased. 415 old_boot_id = self.get_boot_id() 416 if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT, 417 warning_timer=self.WAIT_DOWN_REBOOT_WARNING, 418 old_boot_id=old_boot_id): 419 raise error.AutoservHostError( 420 'servo host %s failed to shut down.' % 421 self.hostname) 422 if self.wait_up(timeout=self.REBOOT_TIMEOUT): 423 logging.info('servo host %s back from reboot, with build %s', 424 self.hostname, self.get_release_version()) 425 else: 426 raise error.AutoservHostError( 427 'servo host %s failed to come back from reboot.' % 428 self.hostname) 429 430 431 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None, 432 connect_timeout=None, alive_interval=None, alive_count_max=None, 433 connection_attempts=None): 434 """Override default make_ssh_command to use tuned options. 435 436 Tuning changes: 437 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH 438 connection failure. Consistency with remote_access.py. 439 440 - ServerAliveInterval=180; which causes SSH to ping connection every 441 180 seconds. In conjunction with ServerAliveCountMax ensures 442 that if the connection dies, Autotest will bail out quickly. 443 444 - ServerAliveCountMax=3; consistency with remote_access.py. 445 446 - ConnectAttempts=4; reduce flakiness in connection errors; 447 consistency with remote_access.py. 448 449 - UserKnownHostsFile=/dev/null; we don't care about the keys. 450 451 - SSH protocol forced to 2; needed for ServerAliveInterval. 452 453 @param user User name to use for the ssh connection. 454 @param port Port on the target host to use for ssh connection. 455 @param opts Additional options to the ssh command. 456 @param hosts_file Ignored. 457 @param connect_timeout Ignored. 458 @param alive_interval Ignored. 459 @param alive_count_max Ignored. 460 @param connection_attempts Ignored. 461 462 @returns: An ssh command with the requested settings. 463 464 """ 465 options = ' '.join([opts, '-o Protocol=2']) 466 return super(BaseServoHost, self).make_ssh_command( 467 user=user, port=port, opts=options, hosts_file='/dev/null', 468 connect_timeout=30, alive_interval=180, alive_count_max=3, 469 connection_attempts=4) 470 471 472 def _make_scp_cmd(self, sources, dest): 473 """Format scp command. 474 475 Given a list of source paths and a destination path, produces the 476 appropriate scp command for encoding it. Remote paths must be 477 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost 478 to allow additional ssh options. 479 480 @param sources: A list of source paths to copy from. 481 @param dest: Destination path to copy to. 482 483 @returns: An scp command that copies |sources| on local machine to 484 |dest| on the remote servo host. 485 486 """ 487 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no ' 488 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"') 489 return command % (self._master_ssh.ssh_option, 490 self.port, sources, dest) 491 492 493 def run(self, command, timeout=3600, ignore_status=False, 494 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS, 495 connect_timeout=30, ssh_failure_retry_ok=False, 496 options='', stdin=None, verbose=True, args=()): 497 """Run a command on the servo host. 498 499 Extends method `run` in SSHHost. If the servo host is a remote device, 500 it will call `run` in SSHost without changing anything. 501 If the servo host is 'localhost', it will call utils.system_output. 502 503 @param command: The command line string. 504 @param timeout: Time limit in seconds before attempting to 505 kill the running process. The run() function 506 will take a few seconds longer than 'timeout' 507 to complete if it has to kill the process. 508 @param ignore_status: Do not raise an exception, no matter 509 what the exit code of the command is. 510 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr. 511 @param connect_timeout: SSH connection timeout (in seconds) 512 Ignored if host is 'localhost'. 513 @param options: String with additional ssh command options 514 Ignored if host is 'localhost'. 515 @param ssh_failure_retry_ok: when True and ssh connection failure is 516 suspected, OK to retry command (but not 517 compulsory, and likely not needed here) 518 @param stdin: Stdin to pass (a string) to the executed command. 519 @param verbose: Log the commands. 520 @param args: Sequence of strings to pass as arguments to command by 521 quoting them in " and escaping their contents if necessary. 522 523 @returns: A utils.CmdResult object. 524 525 @raises AutoservRunError if the command failed. 526 @raises AutoservSSHTimeout SSH connection has timed out. Only applies 527 when servo host is not 'localhost'. 528 529 """ 530 run_args = { 531 'command' : command, 532 'timeout' : timeout, 533 'ignore_status' : ignore_status, 534 'stdout_tee' : stdout_tee, 535 'stderr_tee' : stderr_tee, 536 # connect_timeout n/a for localhost 537 # options n/a for localhost 538 # ssh_failure_retry_ok n/a for localhost 539 'stdin' : stdin, 540 'verbose' : verbose, 541 'args' : args, 542 } 543 if self.is_localhost(): 544 if self._sudo_required: 545 run_args['command'] = 'sudo -n sh -c "%s"' % utils.sh_escape( 546 command) 547 try: 548 return utils.run(**run_args) 549 except error.CmdError as e: 550 logging.error(e) 551 raise error.AutoservRunError('command execution error', 552 e.result_obj) 553 else: 554 run_args['connect_timeout'] = connect_timeout 555 run_args['options'] = options 556 run_args['ssh_failure_retry_ok'] = ssh_failure_retry_ok 557 return super(BaseServoHost, self).run(**run_args) 558 559 def _mount_drive(self, src_path, dst_path): 560 """Mount an external drive on servohost. 561 562 @param: src_path the drive path to mount(e.g. /dev/sda3). 563 @param: dst_path the destination directory on servohost to mount 564 the drive. 565 566 @returns: True if mount success otherwise False. 567 """ 568 # Make sure the dst dir exists. 569 self.run('mkdir -p %s' % dst_path) 570 571 result = self.run('mount -o ro %s %s' % (src_path, dst_path), 572 ignore_status=True) 573 return result.exit_status == 0 574 575 def _unmount_drive(self, mount_path): 576 """Unmount a drive from servohost. 577 578 @param: mount_path the path on servohost to unmount. 579 580 @returns: True if unmount success otherwise False. 581 """ 582 result = self.run('umount %s' % mount_path, ignore_status=True) 583 return result.exit_status == 0 584 585 def wait_ready(self, required_uptime=300): 586 """Wait ready for a servohost if it has been rebooted recently. 587 588 It may take a few minutes until all servos and their componments 589 re-enumerated and become ready after a servohost(especially labstation 590 as it supports multiple servos) reboot, so we need to make sure the 591 servohost has been up for a given a mount of time before trying to 592 start any actions. 593 594 @param required_uptime: Minimum uptime in seconds that we can 595 consdier a servohost be ready. 596 """ 597 uptime = float(self.check_uptime()) 598 # To prevent unexpected output from check_uptime() that causes long 599 # sleep, make sure the maximum wait time <= required_uptime. 600 diff = min(required_uptime - uptime, required_uptime) 601 if diff > 0: 602 logging.info( 603 'The servohost was just rebooted, wait %s' 604 ' seconds for it to become ready', diff) 605 time.sleep(diff) 606