1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4# 5# Expects to be run in an environment with sudo and no interactive password 6# prompt, such as within the Chromium OS development chroot. 7 8 9"""This file provides core logic for servo verify/repair process.""" 10 11 12import httplib 13import logging 14import socket 15import xmlrpclib 16 17from autotest_lib.client.bin import utils 18from autotest_lib.client.common_lib import control_data 19from autotest_lib.client.common_lib import error 20from autotest_lib.client.common_lib import global_config 21from autotest_lib.client.common_lib import host_states 22from autotest_lib.client.common_lib import hosts 23from autotest_lib.client.common_lib import lsbrelease_utils 24from autotest_lib.client.common_lib.cros import autoupdater 25from autotest_lib.client.common_lib.cros import dev_server 26from autotest_lib.client.common_lib.cros import retry 27from autotest_lib.client.common_lib.cros.network import ping_runner 28from autotest_lib.client.cros import constants as client_constants 29from autotest_lib.server import afe_utils 30from autotest_lib.server import site_utils as server_site_utils 31from autotest_lib.server.cros import dnsname_mangler 32from autotest_lib.server.cros.dynamic_suite import frontend_wrappers 33from autotest_lib.server.cros.dynamic_suite import control_file_getter 34from autotest_lib.server.cros.servo import servo 35from autotest_lib.server.hosts import servo_repair 36from autotest_lib.server.hosts import ssh_host 37from autotest_lib.site_utils.rpm_control_system import rpm_client 38 39try: 40 from chromite.lib import metrics 41except ImportError: 42 metrics = utils.metrics_mock 43 44 45# Names of the host attributes in the database that represent the values for 46# the servo_host and servo_port for a servo connected to the DUT. 47SERVO_HOST_ATTR = 'servo_host' 48SERVO_PORT_ATTR = 'servo_port' 49SERVO_BOARD_ATTR = 'servo_board' 50SERVO_SERIAL_ATTR = 'servo_serial' 51 52_CONFIG = global_config.global_config 53ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value( 54 'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False) 55 56AUTOTEST_BASE = _CONFIG.get_config_value( 57 'SCHEDULER', 'drone_installation_directory', 58 default='/usr/local/autotest') 59 60_SERVO_HOST_REBOOT_TEST_NAME = 'servohost_Reboot' 61_SERVO_HOST_FORCE_REBOOT_TEST_NAME = 'servohost_Reboot.force_reboot' 62 63class ServoHost(ssh_host.SSHHost): 64 """Host class for a host that controls a servo, e.g. beaglebone.""" 65 66 DEFAULT_PORT = 9999 67 68 # Timeout for initializing servo signals. 69 INITIALIZE_SERVO_TIMEOUT_SECS = 60 70 71 # Ready test function 72 SERVO_READY_METHOD = 'get_version' 73 74 REBOOT_CMD = 'sleep 1; reboot & sleep 10; reboot -f' 75 76 77 def _initialize(self, servo_host='localhost', 78 servo_port=DEFAULT_PORT, servo_board=None, 79 servo_serial=None, is_in_lab=None, *args, **dargs): 80 """Initialize a ServoHost instance. 81 82 A ServoHost instance represents a host that controls a servo. 83 84 @param servo_host: Name of the host where the servod process 85 is running. 86 @param servo_port: Port the servod process is listening on. 87 @param servo_board: Board that the servo is connected to. 88 @param is_in_lab: True if the servo host is in Cros Lab. Default is set 89 to None, for which utils.host_is_in_lab_zone will be 90 called to check if the servo host is in Cros lab. 91 92 """ 93 super(ServoHost, self)._initialize(hostname=servo_host, 94 *args, **dargs) 95 self.servo_port = servo_port 96 self.servo_board = servo_board 97 self.servo_serial = servo_serial 98 self._servo = None 99 self._repair_strategy = ( 100 servo_repair.create_servo_repair_strategy()) 101 self._is_localhost = (self.hostname == 'localhost') 102 if self._is_localhost: 103 self._is_in_lab = False 104 elif is_in_lab is None: 105 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname) 106 else: 107 self._is_in_lab = is_in_lab 108 109 # Commands on the servo host must be run by the superuser. 110 # Our account on a remote host is root, but if our target is 111 # localhost then we might be running unprivileged. If so, 112 # `sudo` will have to be added to the commands. 113 if self._is_localhost: 114 self._sudo_required = utils.system_output('id -u') != '0' 115 else: 116 self._sudo_required = False 117 118 119 def connect_servo(self): 120 """Establish a connection to the servod server on this host. 121 122 Initializes `self._servo` and then verifies that all network 123 connections are working. This will create an ssh tunnel if 124 it's required. 125 126 As a side effect of testing the connection, all signals on the 127 target servo are reset to default values, and the USB stick is 128 set to the neutral (off) position. 129 """ 130 servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial) 131 timeout, _ = retry.timeout( 132 servo_obj.initialize_dut, 133 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS) 134 if timeout: 135 raise hosts.AutoservVerifyError( 136 'Servo initialize timed out.') 137 self._servo = servo_obj 138 139 140 def disconnect_servo(self): 141 """Disconnect our servo if it exists. 142 143 If we've previously successfully connected to our servo, 144 disconnect any established ssh tunnel, and set `self._servo` 145 back to `None`. 146 """ 147 if self._servo: 148 # N.B. This call is safe even without a tunnel: 149 # rpc_server_tracker.disconnect() silently ignores 150 # unknown ports. 151 self.rpc_server_tracker.disconnect(self.servo_port) 152 self._servo = None 153 154 155 def is_in_lab(self): 156 """Check whether the servo host is a lab device. 157 158 @returns: True if the servo host is in Cros Lab, otherwise False. 159 160 """ 161 return self._is_in_lab 162 163 164 def is_localhost(self): 165 """Checks whether the servo host points to localhost. 166 167 @returns: True if it points to localhost, otherwise False. 168 169 """ 170 return self._is_localhost 171 172 173 def get_servod_server_proxy(self): 174 """Return a proxy that can be used to communicate with servod server. 175 176 @returns: An xmlrpclib.ServerProxy that is connected to the servod 177 server on the host. 178 """ 179 if ENABLE_SSH_TUNNEL_FOR_SERVO and not self.is_localhost(): 180 return self.rpc_server_tracker.xmlrpc_connect( 181 None, self.servo_port, 182 ready_test_name=self.SERVO_READY_METHOD, 183 timeout_seconds=60) 184 else: 185 remote = 'http://%s:%s' % (self.hostname, self.servo_port) 186 return xmlrpclib.ServerProxy(remote) 187 188 189 def is_cros_host(self): 190 """Check if a servo host is running chromeos. 191 192 @return: True if the servo host is running chromeos. 193 False if it isn't, or we don't have enough information. 194 """ 195 try: 196 result = self.run('grep -q CHROMEOS /etc/lsb-release', 197 ignore_status=True, timeout=10) 198 except (error.AutoservRunError, error.AutoservSSHTimeout): 199 return False 200 return result.exit_status == 0 201 202 203 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None, 204 connect_timeout=None, alive_interval=None, 205 alive_count_max=None, connection_attempts=None): 206 """Override default make_ssh_command to use tuned options. 207 208 Tuning changes: 209 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH 210 connection failure. Consistency with remote_access.py. 211 212 - ServerAliveInterval=180; which causes SSH to ping connection every 213 180 seconds. In conjunction with ServerAliveCountMax ensures 214 that if the connection dies, Autotest will bail out quickly. 215 216 - ServerAliveCountMax=3; consistency with remote_access.py. 217 218 - ConnectAttempts=4; reduce flakiness in connection errors; 219 consistency with remote_access.py. 220 221 - UserKnownHostsFile=/dev/null; we don't care about the keys. 222 223 - SSH protocol forced to 2; needed for ServerAliveInterval. 224 225 @param user User name to use for the ssh connection. 226 @param port Port on the target host to use for ssh connection. 227 @param opts Additional options to the ssh command. 228 @param hosts_file Ignored. 229 @param connect_timeout Ignored. 230 @param alive_interval Ignored. 231 @param alive_count_max Ignored. 232 @param connection_attempts Ignored. 233 234 @returns: An ssh command with the requested settings. 235 236 """ 237 options = ' '.join([opts, '-o Protocol=2']) 238 return super(ServoHost, self).make_ssh_command( 239 user=user, port=port, opts=options, hosts_file='/dev/null', 240 connect_timeout=30, alive_interval=180, alive_count_max=3, 241 connection_attempts=4) 242 243 244 def _make_scp_cmd(self, sources, dest): 245 """Format scp command. 246 247 Given a list of source paths and a destination path, produces the 248 appropriate scp command for encoding it. Remote paths must be 249 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost 250 to allow additional ssh options. 251 252 @param sources: A list of source paths to copy from. 253 @param dest: Destination path to copy to. 254 255 @returns: An scp command that copies |sources| on local machine to 256 |dest| on the remote servo host. 257 258 """ 259 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no ' 260 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"') 261 return command % (self.master_ssh_option, 262 self.port, ' '.join(sources), dest) 263 264 265 def run(self, command, timeout=3600, ignore_status=False, 266 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS, 267 connect_timeout=30, ssh_failure_retry_ok=False, 268 options='', stdin=None, verbose=True, args=()): 269 """Run a command on the servo host. 270 271 Extends method `run` in SSHHost. If the servo host is a remote device, 272 it will call `run` in SSHost without changing anything. 273 If the servo host is 'localhost', it will call utils.system_output. 274 275 @param command: The command line string. 276 @param timeout: Time limit in seconds before attempting to 277 kill the running process. The run() function 278 will take a few seconds longer than 'timeout' 279 to complete if it has to kill the process. 280 @param ignore_status: Do not raise an exception, no matter 281 what the exit code of the command is. 282 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr. 283 @param connect_timeout: SSH connection timeout (in seconds) 284 Ignored if host is 'localhost'. 285 @param options: String with additional ssh command options 286 Ignored if host is 'localhost'. 287 @param ssh_failure_retry_ok: when True and ssh connection failure is 288 suspected, OK to retry command (but not 289 compulsory, and likely not needed here) 290 @param stdin: Stdin to pass (a string) to the executed command. 291 @param verbose: Log the commands. 292 @param args: Sequence of strings to pass as arguments to command by 293 quoting them in " and escaping their contents if necessary. 294 295 @returns: A utils.CmdResult object. 296 297 @raises AutoservRunError if the command failed. 298 @raises AutoservSSHTimeout SSH connection has timed out. Only applies 299 when servo host is not 'localhost'. 300 301 """ 302 run_args = {'command': command, 'timeout': timeout, 303 'ignore_status': ignore_status, 'stdout_tee': stdout_tee, 304 'stderr_tee': stderr_tee, 'stdin': stdin, 305 'verbose': verbose, 'args': args} 306 if self.is_localhost(): 307 if self._sudo_required: 308 run_args['command'] = 'sudo -n sh -c "%s"' % utils.sh_escape( 309 command) 310 try: 311 return utils.run(**run_args) 312 except error.CmdError as e: 313 logging.error(e) 314 raise error.AutoservRunError('command execution error', 315 e.result_obj) 316 else: 317 run_args['connect_timeout'] = connect_timeout 318 run_args['options'] = options 319 return super(ServoHost, self).run(**run_args) 320 321 322 def _get_release_version(self): 323 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release. 324 325 @returns The version string in lsb-release, under attribute 326 CHROMEOS_RELEASE_VERSION. 327 """ 328 lsb_release_content = self.run( 329 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip() 330 return lsbrelease_utils.get_chromeos_release_version( 331 lsb_release_content=lsb_release_content) 332 333 334 def get_attached_duts(self, afe): 335 """Gather a list of duts that use this servo host. 336 337 @param afe: afe instance. 338 339 @returns list of duts. 340 """ 341 return afe.get_hosts_by_attribute( 342 attribute=SERVO_HOST_ATTR, value=self.hostname) 343 344 345 def get_board(self): 346 """Determine the board for this servo host. 347 348 @returns a string representing this servo host's board. 349 """ 350 return lsbrelease_utils.get_current_board( 351 lsb_release_content=self.run('cat /etc/lsb-release').stdout) 352 353 354 def _choose_dut_for_synchronized_reboot(self, dut_list, afe): 355 """Choose which dut to schedule servo host reboot job. 356 357 We'll want a semi-deterministic way of selecting which host should be 358 scheduled for the servo host reboot job. For now we'll sort the 359 list with the expectation the dut list will stay consistent. 360 From there we'll grab the first dut that is available so we 361 don't schedule a job on a dut that will never run. 362 363 @param dut_list: List of the dut hostnames to choose from. 364 @param afe: Instance of the AFE. 365 366 @return hostname of dut to schedule job on. 367 """ 368 afe_hosts = afe.get_hosts(dut_list) 369 afe_hosts.sort() 370 for afe_host in afe_hosts: 371 if afe_host.status not in host_states.UNAVAILABLE_STATES: 372 return afe_host.hostname 373 # If they're all unavailable, just return the first sorted dut. 374 dut_list.sort() 375 return dut_list[0] 376 377 378 def _sync_job_scheduled_for_duts(self, dut_list, afe): 379 """Checks if a synchronized reboot has been scheduled for these duts. 380 381 Grab all the host queue entries that aren't completed for the duts and 382 see if any of them have the expected job name. 383 384 @param dut_list: List of duts to check on. 385 @param afe: Instance of the AFE. 386 387 @returns True if the job is scheduled, False otherwise. 388 """ 389 afe_hosts = afe.get_hosts(dut_list) 390 for afe_host in afe_hosts: 391 hqes = afe.get_host_queue_entries(host=afe_host.id, complete=0) 392 for hqe in hqes: 393 job = afe.get_jobs(id=hqe.job.id) 394 if job and job[0].name in (_SERVO_HOST_REBOOT_TEST_NAME, 395 _SERVO_HOST_FORCE_REBOOT_TEST_NAME): 396 return True 397 return False 398 399 400 def schedule_synchronized_reboot(self, dut_list, afe, force_reboot=False): 401 """Schedule a job to reboot the servo host. 402 403 When we schedule a job, it will create a ServoHost object which will 404 go through this entire flow of checking if a reboot is needed and 405 trying to schedule it. There is probably a better approach to setting 406 up a synchronized reboot but I'm coming up short on better ideas so I 407 apologize for this circus show. 408 409 @param dut_list: List of duts that need to be locked. 410 @param afe: Instance of afe. 411 @param force_reboot: Boolean to indicate if a forced reboot should be 412 scheduled or not. 413 """ 414 # If we've already scheduled job on a dut, we're done here. 415 if self._sync_job_scheduled_for_duts(dut_list, afe): 416 return 417 418 # Looks like we haven't scheduled a job yet. 419 test = (_SERVO_HOST_REBOOT_TEST_NAME if not force_reboot 420 else _SERVO_HOST_FORCE_REBOOT_TEST_NAME) 421 dut = self._choose_dut_for_synchronized_reboot(dut_list, afe) 422 getter = control_file_getter.FileSystemGetter([AUTOTEST_BASE]) 423 control_file = getter.get_control_file_contents_by_name(test) 424 control_type = control_data.CONTROL_TYPE_NAMES.SERVER 425 try: 426 afe.create_job(control_file=control_file, name=test, 427 control_type=control_type, hosts=[dut]) 428 except Exception as e: 429 # Sometimes creating the job will raise an exception. We'll log it 430 # but we don't want to fail because of it. 431 logging.exception('Scheduling reboot job failed due to Exception.') 432 433 434 def reboot(self, *args, **dargs): 435 """Reboot using special servo host reboot command.""" 436 super(ServoHost, self).reboot(reboot_cmd=self.REBOOT_CMD, 437 *args, **dargs) 438 439 440 def _check_for_reboot(self, updater): 441 """Reboot this servo host if an upgrade is waiting. 442 443 If the host has successfully downloaded and finalized a new 444 build, reboot. 445 446 @param updater: a ChromiumOSUpdater instance for checking 447 whether reboot is needed. 448 @return Return a (status, build) tuple reflecting the 449 update_engine status and current build of the host 450 at the end of the call. 451 """ 452 current_build_number = self._get_release_version() 453 status = updater.check_update_status() 454 if status == autoupdater.UPDATER_NEED_REBOOT: 455 # Check if we need to schedule an organized reboot. 456 afe = frontend_wrappers.RetryingAFE( 457 timeout_min=5, delay_sec=10, 458 server=server_site_utils.get_global_afe_hostname()) 459 dut_list = self.get_attached_duts(afe) 460 logging.info('servo host has the following duts: %s', dut_list) 461 if len(dut_list) > 1: 462 logging.info('servo host has multiple duts, scheduling ' 463 'synchronized reboot') 464 self.schedule_synchronized_reboot(dut_list, afe) 465 return status, current_build_number 466 467 logging.info('Rebooting servo host %s from build %s', 468 self.hostname, current_build_number) 469 # Tell the reboot() call not to wait for completion. 470 # Otherwise, the call will log reboot failure if servo does 471 # not come back. The logged reboot failure will lead to 472 # test job failure. If the test does not require servo, we 473 # don't want servo failure to fail the test with error: 474 # `Host did not return from reboot` in status.log. 475 self.reboot(fastsync=True, wait=False) 476 477 # We told the reboot() call not to wait, but we need to wait 478 # for the reboot before we continue. Alas. The code from 479 # here below is basically a copy of Host.wait_for_restart(), 480 # with the logging bits ripped out, so that they can't cause 481 # the failure logging problem described above. 482 # 483 # The black stain that this has left on my soul can never be 484 # erased. 485 old_boot_id = self.get_boot_id() 486 if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT, 487 warning_timer=self.WAIT_DOWN_REBOOT_WARNING, 488 old_boot_id=old_boot_id): 489 raise error.AutoservHostError( 490 'servo host %s failed to shut down.' % 491 self.hostname) 492 if self.wait_up(timeout=120): 493 current_build_number = self._get_release_version() 494 status = updater.check_update_status() 495 logging.info('servo host %s back from reboot, with build %s', 496 self.hostname, current_build_number) 497 else: 498 raise error.AutoservHostError( 499 'servo host %s failed to come back from reboot.' % 500 self.hostname) 501 return status, current_build_number 502 503 504 def update_image(self, wait_for_update=False): 505 """Update the image on the servo host, if needed. 506 507 This method recognizes the following cases: 508 * If the Host is not running Chrome OS, do nothing. 509 * If a previously triggered update is now complete, reboot 510 to the new version. 511 * If the host is processing a previously triggered update, 512 do nothing. 513 * If the host is running a version of Chrome OS different 514 from the default for servo Hosts, trigger an update, but 515 don't wait for it to complete. 516 517 @param wait_for_update If an update needs to be applied and 518 this is true, then don't return until the update is 519 downloaded and finalized, and the host rebooted. 520 @raises dev_server.DevServerException: If all the devservers are down. 521 @raises site_utils.ParseBuildNameException: If the devserver returns 522 an invalid build name. 523 @raises autoupdater.ChromiumOSError: If something goes wrong in the 524 checking update engine client status or applying an update. 525 @raises AutoservRunError: If the update_engine_client isn't present on 526 the host, and the host is a cros_host. 527 528 """ 529 # servod could be running in a Ubuntu workstation. 530 if not self.is_cros_host(): 531 logging.info('Not attempting an update, either %s is not running ' 532 'chromeos or we cannot find enough information about ' 533 'the host.', self.hostname) 534 return 535 536 if lsbrelease_utils.is_moblab(): 537 logging.info('Not attempting an update, %s is running moblab.', 538 self.hostname) 539 return 540 541 target_build = afe_utils.get_stable_cros_image_name(self.get_board()) 542 target_build_number = server_site_utils.ParseBuildName( 543 target_build)[3] 544 # For servo image staging, we want it as more widely distributed as 545 # possible, so that devservers' load can be evenly distributed. So use 546 # hostname instead of target_build as hash. 547 ds = dev_server.ImageServer.resolve(self.hostname, 548 hostname=self.hostname) 549 url = ds.get_update_url(target_build) 550 551 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self) 552 status, current_build_number = self._check_for_reboot(updater) 553 update_pending = True 554 if status in autoupdater.UPDATER_PROCESSING_UPDATE: 555 logging.info('servo host %s already processing an update, update ' 556 'engine client status=%s', self.hostname, status) 557 elif status == autoupdater.UPDATER_NEED_REBOOT: 558 return 559 elif current_build_number != target_build_number: 560 logging.info('Using devserver url: %s to trigger update on ' 561 'servo host %s, from %s to %s', url, self.hostname, 562 current_build_number, target_build_number) 563 try: 564 ds.stage_artifacts(target_build, 565 artifacts=['full_payload']) 566 except Exception as e: 567 logging.error('Staging artifacts failed: %s', str(e)) 568 logging.error('Abandoning update for this cycle.') 569 else: 570 try: 571 # TODO(jrbarnette): This 'touch' is a gross hack 572 # to get us past crbug.com/613603. Once that 573 # bug is resolved, we should remove this code. 574 self.run('touch /home/chronos/.oobe_completed') 575 updater.trigger_update() 576 except autoupdater.RootFSUpdateError as e: 577 trigger_download_status = 'failed with %s' % str(e) 578 metrics.Counter('chromeos/autotest/servo/' 579 'rootfs_update_failed').increment() 580 else: 581 trigger_download_status = 'passed' 582 logging.info('Triggered download and update %s for %s, ' 583 'update engine currently in status %s', 584 trigger_download_status, self.hostname, 585 updater.check_update_status()) 586 else: 587 logging.info('servo host %s does not require an update.', 588 self.hostname) 589 update_pending = False 590 591 if update_pending and wait_for_update: 592 logging.info('Waiting for servo update to complete.') 593 self.run('update_engine_client --follow', ignore_status=True) 594 595 596 def verify(self, silent=False): 597 """Update the servo host and verify it's in a good state. 598 599 @param silent If true, suppress logging in `status.log`. 600 """ 601 # TODO(jrbarnette) Old versions of beaglebone_servo include 602 # the powerd package. If you touch the .oobe_completed file 603 # (as we do to work around an update_engine problem), then 604 # powerd will eventually shut down the beaglebone for lack 605 # of (apparent) activity. Current versions of 606 # beaglebone_servo don't have powerd, but until we can purge 607 # the lab of the old images, we need to make sure powerd 608 # isn't running. 609 self.run('stop powerd', ignore_status=True) 610 try: 611 self._repair_strategy.verify(self, silent) 612 except: 613 self.disconnect_servo() 614 raise 615 616 617 def repair(self, silent=False): 618 """Attempt to repair servo host. 619 620 @param silent If true, suppress logging in `status.log`. 621 """ 622 try: 623 self._repair_strategy.repair(self, silent) 624 except: 625 self.disconnect_servo() 626 raise 627 628 629 def has_power(self): 630 """Return whether or not the servo host is powered by PoE.""" 631 # TODO(fdeng): See crbug.com/302791 632 # For now, assume all servo hosts in the lab have power. 633 return self.is_in_lab() 634 635 636 def power_cycle(self): 637 """Cycle power to this host via PoE if it is a lab device. 638 639 @raises AutoservRepairError if it fails to power cycle the 640 servo host. 641 642 """ 643 if self.has_power(): 644 try: 645 rpm_client.set_power(self.hostname, 'CYCLE') 646 except (socket.error, xmlrpclib.Error, 647 httplib.BadStatusLine, 648 rpm_client.RemotePowerException) as e: 649 raise hosts.AutoservRepairError( 650 'Power cycling %s failed: %s' % (self.hostname, e)) 651 else: 652 logging.info('Skipping power cycling, not a lab device.') 653 654 655 def get_servo(self): 656 """Get the cached servo.Servo object. 657 658 @return: a servo.Servo object. 659 """ 660 return self._servo 661 662 663def make_servo_hostname(dut_hostname): 664 """Given a DUT's hostname, return the hostname of its servo. 665 666 @param dut_hostname: hostname of a DUT. 667 668 @return hostname of the DUT's servo. 669 670 """ 671 host_parts = dut_hostname.split('.') 672 host_parts[0] = host_parts[0] + '-servo' 673 return '.'.join(host_parts) 674 675 676def servo_host_is_up(servo_hostname): 677 """Given a servo host name, return if it's up or not. 678 679 @param servo_hostname: hostname of the servo host. 680 681 @return True if it's up, False otherwise 682 """ 683 # Technically, this duplicates the SSH ping done early in the servo 684 # proxy initialization code. However, this ping ends in a couple 685 # seconds when if fails, rather than the 60 seconds it takes to decide 686 # that an SSH ping has timed out. Specifically, that timeout happens 687 # when our servo DNS name resolves, but there is no host at that IP. 688 logging.info('Pinging servo host at %s', servo_hostname) 689 ping_config = ping_runner.PingConfig( 690 servo_hostname, count=3, 691 ignore_result=True, ignore_status=True) 692 return ping_runner.PingRunner().ping(ping_config).received > 0 693 694 695def _map_afe_board_to_servo_board(afe_board): 696 """Map a board we get from the AFE to a servo appropriate value. 697 698 Many boards are identical to other boards for servo's purposes. 699 This function makes that mapping. 700 701 @param afe_board string board name received from AFE. 702 @return board we expect servo to have. 703 704 """ 705 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets'] 706 BOARD_MAP = {'gizmo': 'panther'} 707 mapped_board = afe_board 708 if afe_board in BOARD_MAP: 709 mapped_board = BOARD_MAP[afe_board] 710 else: 711 for suffix in KNOWN_SUFFIXES: 712 if afe_board.endswith(suffix): 713 mapped_board = afe_board[0:-len(suffix)] 714 break 715 if mapped_board != afe_board: 716 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board) 717 return mapped_board 718 719 720def _get_standard_servo_args(dut_host): 721 """Return servo data associated with a given DUT. 722 723 This checks for the presence of servo host and port attached to the 724 given `dut_host`. This data should be stored in the 725 `_afe_host.attributes` field in the provided `dut_host` parameter. 726 727 @param dut_host Instance of `Host` on which to find the servo 728 attributes. 729 @return A tuple of `servo_args` dict with host and an option port, 730 plus an `is_in_lab` flag indicating whether this in the CrOS 731 test lab, or some different environment. 732 """ 733 servo_args = None 734 is_in_lab = False 735 is_ssp_moblab = False 736 if utils.is_in_container(): 737 is_moblab = _CONFIG.get_config_value( 738 'SSP', 'is_moblab', type=bool, default=False) 739 is_ssp_moblab = is_moblab 740 else: 741 is_moblab = utils.is_moblab() 742 attrs = dut_host._afe_host.attributes 743 if attrs and SERVO_HOST_ATTR in attrs: 744 servo_host = attrs[SERVO_HOST_ATTR] 745 if (is_ssp_moblab and servo_host in ['localhost', '127.0.0.1']): 746 servo_host = _CONFIG.get_config_value( 747 'SSP', 'host_container_ip', type=str, default=None) 748 servo_args = {SERVO_HOST_ATTR: servo_host} 749 if SERVO_PORT_ATTR in attrs: 750 try: 751 servo_port = attrs[SERVO_PORT_ATTR] 752 servo_args[SERVO_PORT_ATTR] = int(servo_port) 753 except ValueError: 754 logging.error('servo port is not an int: %s', servo_port) 755 # Let's set the servo args to None since we're not creating 756 # the ServoHost object with the proper port now. 757 servo_args = None 758 if SERVO_SERIAL_ATTR in attrs: 759 servo_args[SERVO_SERIAL_ATTR] = attrs[SERVO_SERIAL_ATTR] 760 is_in_lab = (not is_moblab 761 and utils.host_is_in_lab_zone(servo_host)) 762 763 # TODO(jrbarnette): This test to use the default lab servo hostname 764 # is a legacy that we need only until every host in the DB has 765 # proper attributes. 766 elif (not is_moblab and 767 not dnsname_mangler.is_ip_address(dut_host.hostname)): 768 servo_host = make_servo_hostname(dut_host.hostname) 769 is_in_lab = utils.host_is_in_lab_zone(servo_host) 770 if is_in_lab: 771 servo_args = {SERVO_HOST_ATTR: servo_host} 772 if servo_args is not None: 773 info = dut_host.host_info_store.get() 774 if info.board: 775 servo_args[SERVO_BOARD_ATTR] = _map_afe_board_to_servo_board( 776 info.board) 777 return servo_args, is_in_lab 778 779 780def create_servo_host(dut, servo_args, try_lab_servo=False, 781 try_servo_repair=False): 782 """Create a ServoHost object for a given DUT, if appropriate. 783 784 This function attempts to create and verify or repair a `ServoHost` 785 object for a servo connected to the given `dut`, subject to various 786 constraints imposed by the parameters: 787 * When the `servo_args` parameter is not `None`, a servo 788 host must be created, and must be checked with `repair()`. 789 * Otherwise, if a servo exists in the lab and `try_lab_servo` is 790 true: 791 * If `try_servo_repair` is true, then create a servo host and 792 check it with `repair()`. 793 * Otherwise, if the servo responds to `ping` then create a 794 servo host and check it with `verify()`. 795 796 In cases where `servo_args` was not `None`, repair failure 797 exceptions are passed back to the caller; otherwise, exceptions 798 are logged and then discarded. Note that this only happens in cases 799 where we're called from a test (not special task) control file that 800 has an explicit dependency on servo. In that case, we require that 801 repair not write to `status.log`, so as to avoid polluting test 802 results. 803 804 TODO(jrbarnette): The special handling for servo in test control 805 files is a thorn in my flesh; I dearly hope to see it cut out before 806 my retirement. 807 808 Parameters for a servo host consist of a host name, port number, and 809 DUT board, and are determined from one of these sources, in order of 810 priority: 811 * Servo attributes from the `dut` parameter take precedence over 812 all other sources of information. 813 * If a DNS entry for the servo based on the DUT hostname exists in 814 the CrOS lab network, that hostname is used with the default 815 port and the DUT's board. 816 * If no other options are found, the parameters will be taken 817 from the `servo_args` dict passed in from the caller. 818 819 @param dut An instance of `Host` from which to take 820 servo parameters (if available). 821 @param servo_args A dictionary with servo parameters to use if 822 they can't be found from `dut`. If this 823 argument is supplied, unrepaired exceptions 824 from `verify()` will be passed back to the 825 caller. 826 @param try_lab_servo If not true, servo host creation will be 827 skipped unless otherwise required by the 828 caller. 829 @param try_servo_repair If true, check a servo host with 830 `repair()` instead of `verify()`. 831 832 @returns: A ServoHost object or None. See comments above. 833 834 """ 835 servo_dependency = servo_args is not None 836 is_in_lab = False 837 if dut is not None and (try_lab_servo or servo_dependency): 838 servo_args_override, is_in_lab = _get_standard_servo_args(dut) 839 if servo_args_override is not None: 840 servo_args = servo_args_override 841 if servo_args is None: 842 return None 843 if (not servo_dependency and not try_servo_repair and 844 not servo_host_is_up(servo_args[SERVO_HOST_ATTR])): 845 return None 846 newhost = ServoHost(is_in_lab=is_in_lab, **servo_args) 847 # Note that the logic of repair() includes everything done 848 # by verify(). It's sufficient to call one or the other; 849 # we don't need both. 850 if servo_dependency: 851 newhost.repair(silent=True) 852 else: 853 try: 854 if try_servo_repair: 855 newhost.repair() 856 else: 857 newhost.verify() 858 except Exception: 859 operation = 'repair' if try_servo_repair else 'verification' 860 logging.exception('Servo %s failed for %s', 861 operation, newhost.hostname) 862 return newhost 863