1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4# 5# Expects to be run in an environment with sudo and no interactive password 6# prompt, such as within the Chromium OS development chroot. 7 8 9"""This file provides core logic for servo verify/repair process.""" 10 11 12import httplib 13import logging 14import socket 15import xmlrpclib 16 17from autotest_lib.client.bin import utils 18from autotest_lib.client.common_lib import control_data 19from autotest_lib.client.common_lib import error 20from autotest_lib.client.common_lib import global_config 21from autotest_lib.client.common_lib import host_states 22from autotest_lib.client.common_lib import hosts 23from autotest_lib.client.common_lib import lsbrelease_utils 24from autotest_lib.client.common_lib.cros import autoupdater 25from autotest_lib.client.common_lib.cros import dev_server 26from autotest_lib.client.common_lib.cros import retry 27from autotest_lib.client.common_lib.cros.network import ping_runner 28from autotest_lib.client.cros import constants as client_constants 29from autotest_lib.server import afe_utils 30from autotest_lib.server import site_utils as server_site_utils 31from autotest_lib.server.cros import dnsname_mangler 32from autotest_lib.server.cros.dynamic_suite import frontend_wrappers 33from autotest_lib.server.cros.dynamic_suite import control_file_getter 34from autotest_lib.server.cros.servo import servo 35from autotest_lib.server.hosts import servo_repair 36from autotest_lib.server.hosts import ssh_host 37from autotest_lib.site_utils.rpm_control_system import rpm_client 38 39try: 40 from chromite.lib import metrics 41except ImportError: 42 metrics = utils.metrics_mock 43 44 45# Names of the host attributes in the database that represent the values for 46# the servo_host and servo_port for a servo connected to the DUT. 47SERVO_HOST_ATTR = 'servo_host' 48SERVO_PORT_ATTR = 'servo_port' 49SERVO_BOARD_ATTR = 'servo_board' 50SERVO_SERIAL_ATTR = 'servo_serial' 51 52_CONFIG = global_config.global_config 53ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value( 54 'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False) 55 56AUTOTEST_BASE = _CONFIG.get_config_value( 57 'SCHEDULER', 'drone_installation_directory', 58 default='/usr/local/autotest') 59 60_SERVO_HOST_REBOOT_TEST_NAME = 'servohost_Reboot' 61_SERVO_HOST_FORCE_REBOOT_TEST_NAME = 'servohost_Reboot.force_reboot' 62 63class ServoHost(ssh_host.SSHHost): 64 """Host class for a host that controls a servo, e.g. beaglebone.""" 65 66 DEFAULT_PORT = 9999 67 68 # Timeout for initializing servo signals. 69 INITIALIZE_SERVO_TIMEOUT_SECS = 30 70 71 # Ready test function 72 SERVO_READY_METHOD = 'get_version' 73 74 REBOOT_CMD = 'sleep 1; reboot & sleep 10; reboot -f' 75 76 77 def _initialize(self, servo_host='localhost', 78 servo_port=DEFAULT_PORT, servo_board=None, 79 servo_serial=None, is_in_lab=None, *args, **dargs): 80 """Initialize a ServoHost instance. 81 82 A ServoHost instance represents a host that controls a servo. 83 84 @param servo_host: Name of the host where the servod process 85 is running. 86 @param servo_port: Port the servod process is listening on. 87 @param servo_board: Board that the servo is connected to. 88 @param is_in_lab: True if the servo host is in Cros Lab. Default is set 89 to None, for which utils.host_is_in_lab_zone will be 90 called to check if the servo host is in Cros lab. 91 92 """ 93 super(ServoHost, self)._initialize(hostname=servo_host, 94 *args, **dargs) 95 self.servo_port = servo_port 96 self.servo_board = servo_board 97 self.servo_serial = servo_serial 98 self._servo = None 99 self._repair_strategy = ( 100 servo_repair.create_servo_repair_strategy()) 101 self._is_localhost = (self.hostname == 'localhost') 102 if self._is_localhost: 103 self._is_in_lab = False 104 elif is_in_lab is None: 105 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname) 106 else: 107 self._is_in_lab = is_in_lab 108 109 # Commands on the servo host must be run by the superuser. 110 # Our account on a remote host is root, but if our target is 111 # localhost then we might be running unprivileged. If so, 112 # `sudo` will have to be added to the commands. 113 if self._is_localhost: 114 self._sudo_required = utils.system_output('id -u') != '0' 115 else: 116 self._sudo_required = False 117 118 119 def connect_servo(self): 120 """Establish a connection to the servod server on this host. 121 122 Initializes `self._servo` and then verifies that all network 123 connections are working. This will create an ssh tunnel if 124 it's required. 125 126 As a side effect of testing the connection, all signals on the 127 target servo are reset to default values, and the USB stick is 128 set to the neutral (off) position. 129 """ 130 servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial) 131 timeout, _ = retry.timeout( 132 servo_obj.initialize_dut, 133 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS) 134 if timeout: 135 raise hosts.AutoservVerifyError( 136 'Servo initialize timed out.') 137 self._servo = servo_obj 138 139 140 def disconnect_servo(self): 141 """Disconnect our servo if it exists. 142 143 If we've previously successfully connected to our servo, 144 disconnect any established ssh tunnel, and set `self._servo` 145 back to `None`. 146 """ 147 if self._servo: 148 # N.B. This call is safe even without a tunnel: 149 # rpc_server_tracker.disconnect() silently ignores 150 # unknown ports. 151 self.rpc_server_tracker.disconnect(self.servo_port) 152 self._servo = None 153 154 155 def is_in_lab(self): 156 """Check whether the servo host is a lab device. 157 158 @returns: True if the servo host is in Cros Lab, otherwise False. 159 160 """ 161 return self._is_in_lab 162 163 164 def is_localhost(self): 165 """Checks whether the servo host points to localhost. 166 167 @returns: True if it points to localhost, otherwise False. 168 169 """ 170 return self._is_localhost 171 172 173 def get_servod_server_proxy(self): 174 """Return a proxy that can be used to communicate with servod server. 175 176 @returns: An xmlrpclib.ServerProxy that is connected to the servod 177 server on the host. 178 """ 179 if ENABLE_SSH_TUNNEL_FOR_SERVO and not self.is_localhost(): 180 return self.rpc_server_tracker.xmlrpc_connect( 181 None, self.servo_port, 182 ready_test_name=self.SERVO_READY_METHOD, 183 timeout_seconds=60) 184 else: 185 remote = 'http://%s:%s' % (self.hostname, self.servo_port) 186 return xmlrpclib.ServerProxy(remote) 187 188 189 def is_cros_host(self): 190 """Check if a servo host is running chromeos. 191 192 @return: True if the servo host is running chromeos. 193 False if it isn't, or we don't have enough information. 194 """ 195 try: 196 result = self.run('grep -q CHROMEOS /etc/lsb-release', 197 ignore_status=True, timeout=10) 198 except (error.AutoservRunError, error.AutoservSSHTimeout): 199 return False 200 return result.exit_status == 0 201 202 203 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None, 204 connect_timeout=None, alive_interval=None): 205 """Override default make_ssh_command to use tuned options. 206 207 Tuning changes: 208 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH 209 connection failure. Consistency with remote_access.py. 210 211 - ServerAliveInterval=180; which causes SSH to ping connection every 212 180 seconds. In conjunction with ServerAliveCountMax ensures 213 that if the connection dies, Autotest will bail out quickly. 214 215 - ServerAliveCountMax=3; consistency with remote_access.py. 216 217 - ConnectAttempts=4; reduce flakiness in connection errors; 218 consistency with remote_access.py. 219 220 - UserKnownHostsFile=/dev/null; we don't care about the keys. 221 222 - SSH protocol forced to 2; needed for ServerAliveInterval. 223 224 @param user User name to use for the ssh connection. 225 @param port Port on the target host to use for ssh connection. 226 @param opts Additional options to the ssh command. 227 @param hosts_file Ignored. 228 @param connect_timeout Ignored. 229 @param alive_interval Ignored. 230 231 @returns: An ssh command with the requested settings. 232 233 """ 234 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no' 235 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes' 236 ' -o ConnectTimeout=30 -o ServerAliveInterval=180' 237 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4' 238 ' -o Protocol=2 -l %s -p %d') 239 return base_command % (opts, user, port) 240 241 242 def _make_scp_cmd(self, sources, dest): 243 """Format scp command. 244 245 Given a list of source paths and a destination path, produces the 246 appropriate scp command for encoding it. Remote paths must be 247 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost 248 to allow additional ssh options. 249 250 @param sources: A list of source paths to copy from. 251 @param dest: Destination path to copy to. 252 253 @returns: An scp command that copies |sources| on local machine to 254 |dest| on the remote servo host. 255 256 """ 257 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no ' 258 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"') 259 return command % (self.master_ssh_option, 260 self.port, ' '.join(sources), dest) 261 262 263 def run(self, command, timeout=3600, ignore_status=False, 264 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS, 265 connect_timeout=30, ssh_failure_retry_ok=False, 266 options='', stdin=None, verbose=True, args=()): 267 """Run a command on the servo host. 268 269 Extends method `run` in SSHHost. If the servo host is a remote device, 270 it will call `run` in SSHost without changing anything. 271 If the servo host is 'localhost', it will call utils.system_output. 272 273 @param command: The command line string. 274 @param timeout: Time limit in seconds before attempting to 275 kill the running process. The run() function 276 will take a few seconds longer than 'timeout' 277 to complete if it has to kill the process. 278 @param ignore_status: Do not raise an exception, no matter 279 what the exit code of the command is. 280 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr. 281 @param connect_timeout: SSH connection timeout (in seconds) 282 Ignored if host is 'localhost'. 283 @param options: String with additional ssh command options 284 Ignored if host is 'localhost'. 285 @param ssh_failure_retry_ok: when True and ssh connection failure is 286 suspected, OK to retry command (but not 287 compulsory, and likely not needed here) 288 @param stdin: Stdin to pass (a string) to the executed command. 289 @param verbose: Log the commands. 290 @param args: Sequence of strings to pass as arguments to command by 291 quoting them in " and escaping their contents if necessary. 292 293 @returns: A utils.CmdResult object. 294 295 @raises AutoservRunError if the command failed. 296 @raises AutoservSSHTimeout SSH connection has timed out. Only applies 297 when servo host is not 'localhost'. 298 299 """ 300 run_args = {'command': command, 'timeout': timeout, 301 'ignore_status': ignore_status, 'stdout_tee': stdout_tee, 302 'stderr_tee': stderr_tee, 'stdin': stdin, 303 'verbose': verbose, 'args': args} 304 if self.is_localhost(): 305 if self._sudo_required: 306 run_args['command'] = 'sudo -n sh -c "%s"' % utils.sh_escape( 307 command) 308 try: 309 return utils.run(**run_args) 310 except error.CmdError as e: 311 logging.error(e) 312 raise error.AutoservRunError('command execution error', 313 e.result_obj) 314 else: 315 run_args['connect_timeout'] = connect_timeout 316 run_args['options'] = options 317 return super(ServoHost, self).run(**run_args) 318 319 320 def _get_release_version(self): 321 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release. 322 323 @returns The version string in lsb-release, under attribute 324 CHROMEOS_RELEASE_VERSION. 325 """ 326 lsb_release_content = self.run( 327 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip() 328 return lsbrelease_utils.get_chromeos_release_version( 329 lsb_release_content=lsb_release_content) 330 331 332 def get_attached_duts(self, afe): 333 """Gather a list of duts that use this servo host. 334 335 @param afe: afe instance. 336 337 @returns list of duts. 338 """ 339 return afe.get_hosts_by_attribute( 340 attribute=SERVO_HOST_ATTR, value=self.hostname) 341 342 343 def get_board(self): 344 """Determine the board for this servo host. 345 346 @returns a string representing this servo host's board. 347 """ 348 return lsbrelease_utils.get_current_board( 349 lsb_release_content=self.run('cat /etc/lsb-release').stdout) 350 351 352 def _choose_dut_for_synchronized_reboot(self, dut_list, afe): 353 """Choose which dut to schedule servo host reboot job. 354 355 We'll want a semi-deterministic way of selecting which host should be 356 scheduled for the servo host reboot job. For now we'll sort the 357 list with the expectation the dut list will stay consistent. 358 From there we'll grab the first dut that is available so we 359 don't schedule a job on a dut that will never run. 360 361 @param dut_list: List of the dut hostnames to choose from. 362 @param afe: Instance of the AFE. 363 364 @return hostname of dut to schedule job on. 365 """ 366 afe_hosts = afe.get_hosts(dut_list) 367 afe_hosts.sort() 368 for afe_host in afe_hosts: 369 if afe_host.status not in host_states.UNAVAILABLE_STATES: 370 return afe_host.hostname 371 # If they're all unavailable, just return the first sorted dut. 372 dut_list.sort() 373 return dut_list[0] 374 375 376 def _sync_job_scheduled_for_duts(self, dut_list, afe): 377 """Checks if a synchronized reboot has been scheduled for these duts. 378 379 Grab all the host queue entries that aren't completed for the duts and 380 see if any of them have the expected job name. 381 382 @param dut_list: List of duts to check on. 383 @param afe: Instance of the AFE. 384 385 @returns True if the job is scheduled, False otherwise. 386 """ 387 afe_hosts = afe.get_hosts(dut_list) 388 for afe_host in afe_hosts: 389 hqes = afe.get_host_queue_entries(host=afe_host.id, complete=0) 390 for hqe in hqes: 391 job = afe.get_jobs(id=hqe.job.id) 392 if job and job[0].name in (_SERVO_HOST_REBOOT_TEST_NAME, 393 _SERVO_HOST_FORCE_REBOOT_TEST_NAME): 394 return True 395 return False 396 397 398 def schedule_synchronized_reboot(self, dut_list, afe, force_reboot=False): 399 """Schedule a job to reboot the servo host. 400 401 When we schedule a job, it will create a ServoHost object which will 402 go through this entire flow of checking if a reboot is needed and 403 trying to schedule it. There is probably a better approach to setting 404 up a synchronized reboot but I'm coming up short on better ideas so I 405 apologize for this circus show. 406 407 @param dut_list: List of duts that need to be locked. 408 @param afe: Instance of afe. 409 @param force_reboot: Boolean to indicate if a forced reboot should be 410 scheduled or not. 411 """ 412 # If we've already scheduled job on a dut, we're done here. 413 if self._sync_job_scheduled_for_duts(dut_list, afe): 414 return 415 416 # Looks like we haven't scheduled a job yet. 417 test = (_SERVO_HOST_REBOOT_TEST_NAME if not force_reboot 418 else _SERVO_HOST_FORCE_REBOOT_TEST_NAME) 419 dut = self._choose_dut_for_synchronized_reboot(dut_list, afe) 420 getter = control_file_getter.FileSystemGetter([AUTOTEST_BASE]) 421 control_file = getter.get_control_file_contents_by_name(test) 422 control_type = control_data.CONTROL_TYPE_NAMES.SERVER 423 try: 424 afe.create_job(control_file=control_file, name=test, 425 control_type=control_type, hosts=[dut]) 426 except Exception as e: 427 # Sometimes creating the job will raise an exception. We'll log it 428 # but we don't want to fail because of it. 429 logging.exception('Scheduling reboot job failed due to Exception.') 430 431 432 def reboot(self, *args, **dargs): 433 """Reboot using special servo host reboot command.""" 434 super(ServoHost, self).reboot(reboot_cmd=self.REBOOT_CMD, 435 *args, **dargs) 436 437 438 def _check_for_reboot(self, updater): 439 """Reboot this servo host if an upgrade is waiting. 440 441 If the host has successfully downloaded and finalized a new 442 build, reboot. 443 444 @param updater: a ChromiumOSUpdater instance for checking 445 whether reboot is needed. 446 @return Return a (status, build) tuple reflecting the 447 update_engine status and current build of the host 448 at the end of the call. 449 """ 450 current_build_number = self._get_release_version() 451 status = updater.check_update_status() 452 if status == autoupdater.UPDATER_NEED_REBOOT: 453 # Check if we need to schedule an organized reboot. 454 afe = frontend_wrappers.RetryingAFE( 455 timeout_min=5, delay_sec=10, 456 server=server_site_utils.get_global_afe_hostname()) 457 dut_list = self.get_attached_duts(afe) 458 logging.info('servo host has the following duts: %s', dut_list) 459 if len(dut_list) > 1: 460 logging.info('servo host has multiple duts, scheduling ' 461 'synchronized reboot') 462 self.schedule_synchronized_reboot(dut_list, afe) 463 return status, current_build_number 464 465 logging.info('Rebooting servo host %s from build %s', 466 self.hostname, current_build_number) 467 # Tell the reboot() call not to wait for completion. 468 # Otherwise, the call will log reboot failure if servo does 469 # not come back. The logged reboot failure will lead to 470 # test job failure. If the test does not require servo, we 471 # don't want servo failure to fail the test with error: 472 # `Host did not return from reboot` in status.log. 473 self.reboot(fastsync=True, wait=False) 474 475 # We told the reboot() call not to wait, but we need to wait 476 # for the reboot before we continue. Alas. The code from 477 # here below is basically a copy of Host.wait_for_restart(), 478 # with the logging bits ripped out, so that they can't cause 479 # the failure logging problem described above. 480 # 481 # The black stain that this has left on my soul can never be 482 # erased. 483 old_boot_id = self.get_boot_id() 484 if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT, 485 warning_timer=self.WAIT_DOWN_REBOOT_WARNING, 486 old_boot_id=old_boot_id): 487 raise error.AutoservHostError( 488 'servo host %s failed to shut down.' % 489 self.hostname) 490 if self.wait_up(timeout=120): 491 current_build_number = self._get_release_version() 492 status = updater.check_update_status() 493 logging.info('servo host %s back from reboot, with build %s', 494 self.hostname, current_build_number) 495 else: 496 raise error.AutoservHostError( 497 'servo host %s failed to come back from reboot.' % 498 self.hostname) 499 return status, current_build_number 500 501 502 def update_image(self, wait_for_update=False): 503 """Update the image on the servo host, if needed. 504 505 This method recognizes the following cases: 506 * If the Host is not running Chrome OS, do nothing. 507 * If a previously triggered update is now complete, reboot 508 to the new version. 509 * If the host is processing a previously triggered update, 510 do nothing. 511 * If the host is running a version of Chrome OS different 512 from the default for servo Hosts, trigger an update, but 513 don't wait for it to complete. 514 515 @param wait_for_update If an update needs to be applied and 516 this is true, then don't return until the update is 517 downloaded and finalized, and the host rebooted. 518 @raises dev_server.DevServerException: If all the devservers are down. 519 @raises site_utils.ParseBuildNameException: If the devserver returns 520 an invalid build name. 521 @raises autoupdater.ChromiumOSError: If something goes wrong in the 522 checking update engine client status or applying an update. 523 @raises AutoservRunError: If the update_engine_client isn't present on 524 the host, and the host is a cros_host. 525 526 """ 527 # servod could be running in a Ubuntu workstation. 528 if not self.is_cros_host(): 529 logging.info('Not attempting an update, either %s is not running ' 530 'chromeos or we cannot find enough information about ' 531 'the host.', self.hostname) 532 return 533 534 if lsbrelease_utils.is_moblab(): 535 logging.info('Not attempting an update, %s is running moblab.', 536 self.hostname) 537 return 538 539 target_build = afe_utils.get_stable_cros_image_name(self.get_board()) 540 target_build_number = server_site_utils.ParseBuildName( 541 target_build)[3] 542 # For servo image staging, we want it as more widely distributed as 543 # possible, so that devservers' load can be evenly distributed. So use 544 # hostname instead of target_build as hash. 545 ds = dev_server.ImageServer.resolve(self.hostname, 546 hostname=self.hostname) 547 url = ds.get_update_url(target_build) 548 549 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self) 550 status, current_build_number = self._check_for_reboot(updater) 551 update_pending = True 552 if status in autoupdater.UPDATER_PROCESSING_UPDATE: 553 logging.info('servo host %s already processing an update, update ' 554 'engine client status=%s', self.hostname, status) 555 elif status == autoupdater.UPDATER_NEED_REBOOT: 556 return 557 elif current_build_number != target_build_number: 558 logging.info('Using devserver url: %s to trigger update on ' 559 'servo host %s, from %s to %s', url, self.hostname, 560 current_build_number, target_build_number) 561 try: 562 ds.stage_artifacts(target_build, 563 artifacts=['full_payload']) 564 except Exception as e: 565 logging.error('Staging artifacts failed: %s', str(e)) 566 logging.error('Abandoning update for this cycle.') 567 else: 568 try: 569 # TODO(jrbarnette): This 'touch' is a gross hack 570 # to get us past crbug.com/613603. Once that 571 # bug is resolved, we should remove this code. 572 self.run('touch /home/chronos/.oobe_completed') 573 updater.trigger_update() 574 except autoupdater.RootFSUpdateError as e: 575 trigger_download_status = 'failed with %s' % str(e) 576 metrics.Counter('chromeos/autotest/servo/' 577 'rootfs_update_failed').increment() 578 else: 579 trigger_download_status = 'passed' 580 logging.info('Triggered download and update %s for %s, ' 581 'update engine currently in status %s', 582 trigger_download_status, self.hostname, 583 updater.check_update_status()) 584 else: 585 logging.info('servo host %s does not require an update.', 586 self.hostname) 587 update_pending = False 588 589 if update_pending and wait_for_update: 590 logging.info('Waiting for servo update to complete.') 591 self.run('update_engine_client --follow', ignore_status=True) 592 593 594 def verify(self, silent=False): 595 """Update the servo host and verify it's in a good state. 596 597 @param silent If true, suppress logging in `status.log`. 598 """ 599 # TODO(jrbarnette) Old versions of beaglebone_servo include 600 # the powerd package. If you touch the .oobe_completed file 601 # (as we do to work around an update_engine problem), then 602 # powerd will eventually shut down the beaglebone for lack 603 # of (apparent) activity. Current versions of 604 # beaglebone_servo don't have powerd, but until we can purge 605 # the lab of the old images, we need to make sure powerd 606 # isn't running. 607 self.run('stop powerd', ignore_status=True) 608 try: 609 self._repair_strategy.verify(self, silent) 610 except: 611 self.disconnect_servo() 612 raise 613 614 615 def repair(self, silent=False): 616 """Attempt to repair servo host. 617 618 @param silent If true, suppress logging in `status.log`. 619 """ 620 try: 621 self._repair_strategy.repair(self, silent) 622 except: 623 self.disconnect_servo() 624 raise 625 626 627 def has_power(self): 628 """Return whether or not the servo host is powered by PoE.""" 629 # TODO(fdeng): See crbug.com/302791 630 # For now, assume all servo hosts in the lab have power. 631 return self.is_in_lab() 632 633 634 def power_cycle(self): 635 """Cycle power to this host via PoE if it is a lab device. 636 637 @raises AutoservRepairError if it fails to power cycle the 638 servo host. 639 640 """ 641 if self.has_power(): 642 try: 643 rpm_client.set_power(self.hostname, 'CYCLE') 644 except (socket.error, xmlrpclib.Error, 645 httplib.BadStatusLine, 646 rpm_client.RemotePowerException) as e: 647 raise hosts.AutoservRepairError( 648 'Power cycling %s failed: %s' % (self.hostname, e)) 649 else: 650 logging.info('Skipping power cycling, not a lab device.') 651 652 653 def get_servo(self): 654 """Get the cached servo.Servo object. 655 656 @return: a servo.Servo object. 657 """ 658 return self._servo 659 660 661def make_servo_hostname(dut_hostname): 662 """Given a DUT's hostname, return the hostname of its servo. 663 664 @param dut_hostname: hostname of a DUT. 665 666 @return hostname of the DUT's servo. 667 668 """ 669 host_parts = dut_hostname.split('.') 670 host_parts[0] = host_parts[0] + '-servo' 671 return '.'.join(host_parts) 672 673 674def servo_host_is_up(servo_hostname): 675 """Given a servo host name, return if it's up or not. 676 677 @param servo_hostname: hostname of the servo host. 678 679 @return True if it's up, False otherwise 680 """ 681 # Technically, this duplicates the SSH ping done early in the servo 682 # proxy initialization code. However, this ping ends in a couple 683 # seconds when if fails, rather than the 60 seconds it takes to decide 684 # that an SSH ping has timed out. Specifically, that timeout happens 685 # when our servo DNS name resolves, but there is no host at that IP. 686 logging.info('Pinging servo host at %s', servo_hostname) 687 ping_config = ping_runner.PingConfig( 688 servo_hostname, count=3, 689 ignore_result=True, ignore_status=True) 690 return ping_runner.PingRunner().ping(ping_config).received > 0 691 692 693def _map_afe_board_to_servo_board(afe_board): 694 """Map a board we get from the AFE to a servo appropriate value. 695 696 Many boards are identical to other boards for servo's purposes. 697 This function makes that mapping. 698 699 @param afe_board string board name received from AFE. 700 @return board we expect servo to have. 701 702 """ 703 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets'] 704 BOARD_MAP = {'gizmo': 'panther'} 705 mapped_board = afe_board 706 if afe_board in BOARD_MAP: 707 mapped_board = BOARD_MAP[afe_board] 708 else: 709 for suffix in KNOWN_SUFFIXES: 710 if afe_board.endswith(suffix): 711 mapped_board = afe_board[0:-len(suffix)] 712 break 713 if mapped_board != afe_board: 714 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board) 715 return mapped_board 716 717 718def _get_standard_servo_args(dut_host): 719 """Return servo data associated with a given DUT. 720 721 This checks for the presence of servo host and port attached to the 722 given `dut_host`. This data should be stored in the 723 `_afe_host.attributes` field in the provided `dut_host` parameter. 724 725 @param dut_host Instance of `Host` on which to find the servo 726 attributes. 727 @return A tuple of `servo_args` dict with host and an option port, 728 plus an `is_in_lab` flag indicating whether this in the CrOS 729 test lab, or some different environment. 730 """ 731 servo_args = None 732 is_in_lab = False 733 is_ssp_moblab = False 734 if utils.is_in_container(): 735 is_moblab = _CONFIG.get_config_value( 736 'SSP', 'is_moblab', type=bool, default=False) 737 is_ssp_moblab = is_moblab 738 else: 739 is_moblab = utils.is_moblab() 740 attrs = dut_host._afe_host.attributes 741 if attrs and SERVO_HOST_ATTR in attrs: 742 servo_host = attrs[SERVO_HOST_ATTR] 743 if (is_ssp_moblab and servo_host in ['localhost', '127.0.0.1']): 744 servo_host = _CONFIG.get_config_value( 745 'SSP', 'host_container_ip', type=str, default=None) 746 servo_args = {SERVO_HOST_ATTR: servo_host} 747 if SERVO_PORT_ATTR in attrs: 748 try: 749 servo_port = attrs[SERVO_PORT_ATTR] 750 servo_args[SERVO_PORT_ATTR] = int(servo_port) 751 except ValueError: 752 logging.error('servo port is not an int: %s', servo_port) 753 # Let's set the servo args to None since we're not creating 754 # the ServoHost object with the proper port now. 755 servo_args = None 756 if SERVO_SERIAL_ATTR in attrs: 757 servo_args[SERVO_SERIAL_ATTR] = attrs[SERVO_SERIAL_ATTR] 758 is_in_lab = (not is_moblab 759 and utils.host_is_in_lab_zone(servo_host)) 760 761 # TODO(jrbarnette): This test to use the default lab servo hostname 762 # is a legacy that we need only until every host in the DB has 763 # proper attributes. 764 elif (not is_moblab and 765 not dnsname_mangler.is_ip_address(dut_host.hostname)): 766 servo_host = make_servo_hostname(dut_host.hostname) 767 is_in_lab = utils.host_is_in_lab_zone(servo_host) 768 if is_in_lab: 769 servo_args = {SERVO_HOST_ATTR: servo_host} 770 if servo_args is not None: 771 info = dut_host.host_info_store.get() 772 if info.board: 773 servo_args[SERVO_BOARD_ATTR] = _map_afe_board_to_servo_board( 774 info.board) 775 return servo_args, is_in_lab 776 777 778def create_servo_host(dut, servo_args, try_lab_servo=False, 779 try_servo_repair=False): 780 """Create a ServoHost object for a given DUT, if appropriate. 781 782 This function attempts to create and verify or repair a `ServoHost` 783 object for a servo connected to the given `dut`, subject to various 784 constraints imposed by the parameters: 785 * When the `servo_args` parameter is not `None`, a servo 786 host must be created, and must be checked with `repair()`. 787 * Otherwise, if a servo exists in the lab and `try_lab_servo` is 788 true: 789 * If `try_servo_repair` is true, then create a servo host and 790 check it with `repair()`. 791 * Otherwise, if the servo responds to `ping` then create a 792 servo host and check it with `verify()`. 793 794 In cases where `servo_args` was not `None`, repair failure 795 exceptions are passed back to the caller; otherwise, exceptions 796 are logged and then discarded. Note that this only happens in cases 797 where we're called from a test (not special task) control file that 798 has an explicit dependency on servo. In that case, we require that 799 repair not write to `status.log`, so as to avoid polluting test 800 results. 801 802 TODO(jrbarnette): The special handling for servo in test control 803 files is a thorn in my flesh; I dearly hope to see it cut out before 804 my retirement. 805 806 Parameters for a servo host consist of a host name, port number, and 807 DUT board, and are determined from one of these sources, in order of 808 priority: 809 * Servo attributes from the `dut` parameter take precedence over 810 all other sources of information. 811 * If a DNS entry for the servo based on the DUT hostname exists in 812 the CrOS lab network, that hostname is used with the default 813 port and the DUT's board. 814 * If no other options are found, the parameters will be taken 815 from the `servo_args` dict passed in from the caller. 816 817 @param dut An instance of `Host` from which to take 818 servo parameters (if available). 819 @param servo_args A dictionary with servo parameters to use if 820 they can't be found from `dut`. If this 821 argument is supplied, unrepaired exceptions 822 from `verify()` will be passed back to the 823 caller. 824 @param try_lab_servo If not true, servo host creation will be 825 skipped unless otherwise required by the 826 caller. 827 @param try_servo_repair If true, check a servo host with 828 `repair()` instead of `verify()`. 829 830 @returns: A ServoHost object or None. See comments above. 831 832 """ 833 servo_dependency = servo_args is not None 834 is_in_lab = False 835 if dut is not None and (try_lab_servo or servo_dependency): 836 servo_args_override, is_in_lab = _get_standard_servo_args(dut) 837 if servo_args_override is not None: 838 servo_args = servo_args_override 839 if servo_args is None: 840 return None 841 if (not servo_dependency and not try_servo_repair and 842 not servo_host_is_up(servo_args[SERVO_HOST_ATTR])): 843 return None 844 newhost = ServoHost(is_in_lab=is_in_lab, **servo_args) 845 # Note that the logic of repair() includes everything done 846 # by verify(). It's sufficient to call one or the other; 847 # we don't need both. 848 if servo_dependency: 849 newhost.repair(silent=True) 850 else: 851 try: 852 if try_servo_repair: 853 newhost.repair() 854 else: 855 newhost.verify() 856 except Exception: 857 operation = 'repair' if try_servo_repair else 'verification' 858 logging.exception('Servo %s failed for %s', 859 operation, newhost.hostname) 860 return newhost 861