1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4# 5# Expects to be run in an environment with sudo and no interactive password 6# prompt, such as within the Chromium OS development chroot. 7 8 9"""This file provides core logic for servo verify/repair process.""" 10 11 12import logging 13import os 14import time 15import traceback 16import xmlrpclib 17 18from autotest_lib.client.bin import utils 19from autotest_lib.client.common_lib import error 20from autotest_lib.client.common_lib import global_config 21from autotest_lib.client.common_lib import hosts 22from autotest_lib.client.common_lib.cros import retry 23from autotest_lib.client.common_lib.cros.network import ping_runner 24from autotest_lib.server.cros.servo import servo 25from autotest_lib.server.hosts import servo_repair 26from autotest_lib.server.hosts import base_servohost 27 28 29# Names of the host attributes in the database that represent the values for 30# the servo_host and servo_port for a servo connected to the DUT. 31SERVO_HOST_ATTR = 'servo_host' 32SERVO_PORT_ATTR = 'servo_port' 33SERVO_BOARD_ATTR = 'servo_board' 34# Model is inferred from host labels. 35SERVO_MODEL_ATTR = 'servo_model' 36SERVO_SERIAL_ATTR = 'servo_serial' 37SERVO_ATTR_KEYS = ( 38 SERVO_BOARD_ATTR, 39 SERVO_HOST_ATTR, 40 SERVO_PORT_ATTR, 41 SERVO_SERIAL_ATTR, 42) 43 44# Timeout value for stop/start servod process. 45SERVOD_TEARDOWN_TIMEOUT = 3 46SERVOD_QUICK_STARTUP_TIMEOUT = 20 47SERVOD_STARTUP_TIMEOUT = 60 48 49_CONFIG = global_config.global_config 50ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value( 51 'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False) 52 53AUTOTEST_BASE = _CONFIG.get_config_value( 54 'SCHEDULER', 'drone_installation_directory', 55 default='/usr/local/autotest') 56 57SERVO_STATE_LABEL_PREFIX = 'servo_state' 58SERVO_STATE_WORKING = 'WORKING' 59SERVO_STATE_BROKEN = 'BROKEN' 60 61 62class ServoHost(base_servohost.BaseServoHost): 63 """Host class for a servo host(e.g. beaglebone, labstation) 64 that with a servo instance for a specific port. 65 66 @type _servo: servo.Servo | None 67 """ 68 69 DEFAULT_PORT = int(os.getenv('SERVOD_PORT', '9999')) 70 71 # Timeout for initializing servo signals. 72 INITIALIZE_SERVO_TIMEOUT_SECS = 60 73 74 # Ready test function 75 SERVO_READY_METHOD = 'get_version' 76 77 def _init_attributes(self): 78 self._servo_state = None 79 self.servo_port = None 80 self.servo_board = None 81 self.servo_model = None 82 self.servo_serial = None 83 self._servo = None 84 self._servod_server_proxy = None 85 86 87 def _initialize(self, servo_host='localhost', 88 servo_port=DEFAULT_PORT, servo_board=None, 89 servo_model=None, servo_serial=None, is_in_lab=None, 90 *args, **dargs): 91 """Initialize a ServoHost instance. 92 93 A ServoHost instance represents a host that controls a servo. 94 95 @param servo_host: Name of the host where the servod process 96 is running. 97 @param servo_port: Port the servod process is listening on. Defaults 98 to the SERVOD_PORT environment variable if set, 99 otherwise 9999. 100 @param servo_board: Board that the servo is connected to. 101 @param servo_model: Model that the servo is connected to. 102 @param is_in_lab: True if the servo host is in Cros Lab. Default is set 103 to None, for which utils.host_is_in_lab_zone will be 104 called to check if the servo host is in Cros lab. 105 106 """ 107 super(ServoHost, self)._initialize(hostname=servo_host, 108 is_in_lab=is_in_lab, *args, **dargs) 109 self._init_attributes() 110 self.servo_port = int(servo_port) 111 self.servo_board = servo_board 112 self.servo_model = servo_model 113 self.servo_serial = servo_serial 114 115 # Path of the servo host lock file. 116 self._lock_file = (self.TEMP_FILE_DIR + str(self.servo_port) 117 + self.LOCK_FILE_POSTFIX) 118 # File path to declare a reboot request. 119 self._reboot_file = (self.TEMP_FILE_DIR + str(self.servo_port) 120 + self.REBOOT_FILE_POSTFIX) 121 122 # Lock the servo host if it's an in-lab labstation to prevent other 123 # task to reboot it until current task completes. We also wait and 124 # make sure the labstation is up here, in the case of the labstation is 125 # in the middle of reboot. 126 self._is_locked = False 127 if (self.wait_up(self.REBOOT_TIMEOUT) and self.is_in_lab() 128 and self.is_labstation()): 129 self._lock() 130 131 self._repair_strategy = ( 132 servo_repair.create_servo_repair_strategy()) 133 134 def connect_servo(self): 135 """Establish a connection to the servod server on this host. 136 137 Initializes `self._servo` and then verifies that all network 138 connections are working. This will create an ssh tunnel if 139 it's required. 140 141 As a side effect of testing the connection, all signals on the 142 target servo are reset to default values, and the USB stick is 143 set to the neutral (off) position. 144 """ 145 servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial) 146 self._servo = servo_obj 147 timeout, _ = retry.timeout( 148 servo_obj.initialize_dut, 149 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS) 150 if timeout: 151 raise hosts.AutoservVerifyError( 152 'Servo initialize timed out.') 153 154 155 def disconnect_servo(self): 156 """Disconnect our servo if it exists. 157 158 If we've previously successfully connected to our servo, 159 disconnect any established ssh tunnel, and set `self._servo` 160 back to `None`. 161 """ 162 if self._servo: 163 # N.B. This call is safe even without a tunnel: 164 # rpc_server_tracker.disconnect() silently ignores 165 # unknown ports. 166 self.rpc_server_tracker.disconnect(self.servo_port) 167 self._servo = None 168 169 170 def _create_servod_server_proxy(self): 171 """Create a proxy that can be used to communicate with servod server. 172 173 @returns: An xmlrpclib.ServerProxy that is connected to the servod 174 server on the host. 175 """ 176 if ENABLE_SSH_TUNNEL_FOR_SERVO and not self.is_localhost(): 177 return self.rpc_server_tracker.xmlrpc_connect( 178 None, self.servo_port, 179 ready_test_name=self.SERVO_READY_METHOD, 180 timeout_seconds=60, 181 request_timeout_seconds=3600) 182 else: 183 remote = 'http://%s:%s' % (self.hostname, self.servo_port) 184 return xmlrpclib.ServerProxy(remote) 185 186 187 def get_servod_server_proxy(self): 188 """Return a cached proxy if exists; otherwise, create a new one. 189 190 @returns: An xmlrpclib.ServerProxy that is connected to the servod 191 server on the host. 192 """ 193 # Single-threaded execution, no race 194 if self._servod_server_proxy is None: 195 self._servod_server_proxy = self._create_servod_server_proxy() 196 return self._servod_server_proxy 197 198 199 def verify(self, silent=False): 200 """Update the servo host and verify it's in a good state. 201 202 @param silent If true, suppress logging in `status.log`. 203 """ 204 message = 'Beginning verify for servo host %s port %s serial %s' 205 message %= (self.hostname, self.servo_port, self.servo_serial) 206 self.record('INFO', None, None, message) 207 try: 208 self._repair_strategy.verify(self, silent) 209 self._servo_state = SERVO_STATE_WORKING 210 self.record('INFO', None, None, 'ServoHost verify set servo_state as WORKING') 211 except: 212 self._servo_state = SERVO_STATE_BROKEN 213 self.record('INFO', None, None, 'ServoHost verify set servo_state as BROKEN') 214 self.disconnect_servo() 215 self.stop_servod() 216 raise 217 218 219 def repair(self, silent=False): 220 """Attempt to repair servo host. 221 222 @param silent If true, suppress logging in `status.log`. 223 """ 224 message = 'Beginning repair for servo host %s port %s serial %s' 225 message %= (self.hostname, self.servo_port, self.servo_serial) 226 self.record('INFO', None, None, message) 227 try: 228 self._repair_strategy.repair(self, silent) 229 self._servo_state = SERVO_STATE_WORKING 230 self.record('INFO', None, None, 'ServoHost repair set servo_state as WORKING') 231 # If target is a labstation then try to withdraw any existing 232 # reboot request created by this servo because it passed repair. 233 if self.is_labstation(): 234 self.withdraw_reboot_request() 235 except: 236 self._servo_state = SERVO_STATE_BROKEN 237 self.record('INFO', None, None, 'ServoHost repair set servo_state as BROKEN') 238 self.disconnect_servo() 239 self.stop_servod() 240 raise 241 242 243 def get_servo(self): 244 """Get the cached servo.Servo object. 245 246 @return: a servo.Servo object. 247 @rtype: autotest_lib.server.cros.servo.servo.Servo 248 """ 249 return self._servo 250 251 252 def request_reboot(self): 253 """Request servohost to be rebooted when it's safe to by touch a file. 254 """ 255 logging.debug('Request to reboot servohost %s has been created by ' 256 'servo with port # %s', self.hostname, self.servo_port) 257 self.run('touch %s' % self._reboot_file, ignore_status=True) 258 259 260 def withdraw_reboot_request(self): 261 """Withdraw a servohost reboot request if exists by remove the flag 262 file. 263 """ 264 logging.debug('Withdrawing request to reboot servohost %s that created' 265 ' by servo with port # %s if exists.', 266 self.hostname, self.servo_port) 267 self.run('rm -f %s' % self._reboot_file, ignore_status=True) 268 269 270 def start_servod(self, quick_startup=False): 271 """Start the servod process on servohost. 272 """ 273 # Skip if running on the localhost.(crbug.com/1038168) 274 if self.is_localhost(): 275 logging.debug("Servohost is a localhost, skipping start servod.") 276 return 277 278 cmd = 'start servod' 279 if self.servo_board: 280 cmd += ' BOARD=%s' % self.servo_board 281 if self.servo_model: 282 cmd += ' MODEL=%s' % self.servo_model 283 else: 284 logging.warning('Board for DUT is unknown; starting servod' 285 ' assuming a pre-configured board.') 286 287 cmd += ' PORT=%d' % self.servo_port 288 if self.servo_serial: 289 cmd += ' SERIAL=%s' % self.servo_serial 290 self.run(cmd, timeout=60) 291 292 # There's a lag between when `start servod` completes and when 293 # the _ServodConnectionVerifier trigger can actually succeed. 294 # The call to time.sleep() below gives time to make sure that 295 # the trigger won't fail after we return. 296 297 # Normally servod on servo_v3 and labstation take ~10 seconds to ready, 298 # But in the rare case all servo on a labstation are in heavy use they 299 # may take ~30 seconds. So the timeout value will double these value, 300 # and we'll try quick start up when first time initialize servohost, 301 # and use standard start up timeout in repair. 302 if quick_startup: 303 timeout = SERVOD_QUICK_STARTUP_TIMEOUT 304 else: 305 timeout = SERVOD_STARTUP_TIMEOUT 306 logging.debug('Wait %s seconds for servod process fully up.', timeout) 307 time.sleep(timeout) 308 309 310 def stop_servod(self): 311 """Stop the servod process on servohost. 312 """ 313 # Skip if running on the localhost.(crbug.com/1038168) 314 if self.is_localhost(): 315 logging.debug("Servohost is a localhost, skipping stop servod.") 316 return 317 318 logging.debug('Stopping servod on port %s', self.servo_port) 319 self.run('stop servod PORT=%d' % self.servo_port, 320 timeout=60, ignore_status=True) 321 logging.debug('Wait %s seconds for servod process fully teardown.', 322 SERVOD_TEARDOWN_TIMEOUT) 323 time.sleep(SERVOD_TEARDOWN_TIMEOUT) 324 325 326 def restart_servod(self, quick_startup=False): 327 """Restart the servod process on servohost. 328 """ 329 self.stop_servod() 330 self.start_servod(quick_startup) 331 332 333 def _lock(self): 334 """lock servohost by touching a file. 335 """ 336 logging.debug('Locking servohost %s by touching %s file', 337 self.hostname, self._lock_file) 338 self.run('touch %s' % self._lock_file, ignore_status=True) 339 self._is_locked = True 340 341 342 def _unlock(self): 343 """Unlock servohost by removing the lock file. 344 """ 345 logging.debug('Unlocking servohost by removing %s file', 346 self._lock_file) 347 self.run('rm %s' % self._lock_file, ignore_status=True) 348 self._is_locked = False 349 350 351 def close(self): 352 """Close the associated servo and the host object.""" 353 if self._servo: 354 # In some cases when we run as lab-tools, the job object is None. 355 if self.job and not self._servo.uart_logs_dir: 356 self._servo.uart_logs_dir = self.job.resultdir 357 self._servo.close() 358 359 if self._is_locked: 360 # Remove the lock if the servohost has been locked. 361 try: 362 self._unlock() 363 except error.AutoservSSHTimeout: 364 logging.error('Unlock servohost failed due to ssh timeout.' 365 ' It may caused by servohost went down during' 366 ' the task.') 367 368 # We want always stop servod after task to minimum the impact of bad 369 # servod process interfere other servods.(see crbug.com/1028665) 370 try: 371 self.stop_servod() 372 except error.AutoservRunError as e: 373 logging.info("Failed to stop servod due to:\n%s\n" 374 "This error is forgived.", str(e)) 375 376 super(ServoHost, self).close() 377 378 379 def get_servo_state(self): 380 return SERVO_STATE_BROKEN if self._servo_state is None else self._servo_state 381 382 383def make_servo_hostname(dut_hostname): 384 """Given a DUT's hostname, return the hostname of its servo. 385 386 @param dut_hostname: hostname of a DUT. 387 388 @return hostname of the DUT's servo. 389 390 """ 391 host_parts = dut_hostname.split('.') 392 host_parts[0] = host_parts[0] + '-servo' 393 return '.'.join(host_parts) 394 395 396def servo_host_is_up(servo_hostname): 397 """Given a servo host name, return if it's up or not. 398 399 @param servo_hostname: hostname of the servo host. 400 401 @return True if it's up, False otherwise 402 """ 403 # Technically, this duplicates the SSH ping done early in the servo 404 # proxy initialization code. However, this ping ends in a couple 405 # seconds when if fails, rather than the 60 seconds it takes to decide 406 # that an SSH ping has timed out. Specifically, that timeout happens 407 # when our servo DNS name resolves, but there is no host at that IP. 408 logging.info('Pinging servo host at %s', servo_hostname) 409 ping_config = ping_runner.PingConfig( 410 servo_hostname, count=3, 411 ignore_result=True, ignore_status=True) 412 return ping_runner.PingRunner().ping(ping_config).received > 0 413 414 415def _map_afe_board_to_servo_board(afe_board): 416 """Map a board we get from the AFE to a servo appropriate value. 417 418 Many boards are identical to other boards for servo's purposes. 419 This function makes that mapping. 420 421 @param afe_board string board name received from AFE. 422 @return board we expect servo to have. 423 424 """ 425 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets'] 426 BOARD_MAP = {'gizmo': 'panther'} 427 mapped_board = afe_board 428 if afe_board in BOARD_MAP: 429 mapped_board = BOARD_MAP[afe_board] 430 else: 431 for suffix in KNOWN_SUFFIXES: 432 if afe_board.endswith(suffix): 433 mapped_board = afe_board[0:-len(suffix)] 434 break 435 if mapped_board != afe_board: 436 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board) 437 return mapped_board 438 439 440def get_servo_args_for_host(dut_host): 441 """Return servo data associated with a given DUT. 442 443 @param dut_host Instance of `Host` on which to find the servo 444 attributes. 445 @return `servo_args` dict with host and an optional port. 446 """ 447 info = dut_host.host_info_store.get() 448 servo_args = {k: v for k, v in info.attributes.iteritems() 449 if k in SERVO_ATTR_KEYS} 450 451 if SERVO_PORT_ATTR in servo_args: 452 try: 453 servo_args[SERVO_PORT_ATTR] = int(servo_args[SERVO_PORT_ATTR]) 454 except ValueError: 455 logging.error('servo port is not an int: %s', 456 servo_args[SERVO_PORT_ATTR]) 457 # Reset servo_args because we don't want to use an invalid port. 458 servo_args.pop(SERVO_HOST_ATTR, None) 459 460 if info.board: 461 servo_args[SERVO_BOARD_ATTR] = _map_afe_board_to_servo_board(info.board) 462 if info.model: 463 servo_args[SERVO_MODEL_ATTR] = info.model 464 return servo_args if SERVO_HOST_ATTR in servo_args else None 465 466 467def _tweak_args_for_ssp_moblab(servo_args): 468 if servo_args[SERVO_HOST_ATTR] in ['localhost', '127.0.0.1']: 469 servo_args[SERVO_HOST_ATTR] = _CONFIG.get_config_value( 470 'SSP', 'host_container_ip', type=str, default=None) 471 472 473def create_servo_host(dut, servo_args, try_lab_servo=False, 474 try_servo_repair=False, dut_host_info=None): 475 """Create a ServoHost object for a given DUT, if appropriate. 476 477 This function attempts to create and verify or repair a `ServoHost` 478 object for a servo connected to the given `dut`, subject to various 479 constraints imposed by the parameters: 480 * When the `servo_args` parameter is not `None`, a servo 481 host must be created, and must be checked with `repair()`. 482 * Otherwise, if a servo exists in the lab and `try_lab_servo` is 483 true: 484 * If `try_servo_repair` is true, then create a servo host and 485 check it with `repair()`. 486 * Otherwise, if the servo responds to `ping` then create a 487 servo host and check it with `verify()`. 488 489 In cases where `servo_args` was not `None`, repair failure 490 exceptions are passed back to the caller; otherwise, exceptions 491 are logged and then discarded. Note that this only happens in cases 492 where we're called from a test (not special task) control file that 493 has an explicit dependency on servo. In that case, we require that 494 repair not write to `status.log`, so as to avoid polluting test 495 results. 496 497 TODO(jrbarnette): The special handling for servo in test control 498 files is a thorn in my flesh; I dearly hope to see it cut out before 499 my retirement. 500 501 Parameters for a servo host consist of a host name, port number, and 502 DUT board, and are determined from one of these sources, in order of 503 priority: 504 * Servo attributes from the `dut` parameter take precedence over 505 all other sources of information. 506 * If a DNS entry for the servo based on the DUT hostname exists in 507 the CrOS lab network, that hostname is used with the default 508 port and the DUT's board. 509 * If no other options are found, the parameters will be taken 510 from the `servo_args` dict passed in from the caller. 511 512 @param dut An instance of `Host` from which to take 513 servo parameters (if available). 514 @param servo_args A dictionary with servo parameters to use if 515 they can't be found from `dut`. If this 516 argument is supplied, unrepaired exceptions 517 from `verify()` will be passed back to the 518 caller. 519 @param try_lab_servo If not true, servo host creation will be 520 skipped unless otherwise required by the 521 caller. 522 @param try_servo_repair If true, check a servo host with 523 `repair()` instead of `verify()`. 524 525 @returns: A ServoHost object or None. See comments above. 526 527 """ 528 servo_dependency = servo_args is not None 529 if dut is not None and (try_lab_servo or servo_dependency): 530 servo_args_override = get_servo_args_for_host(dut) 531 if servo_args_override is not None: 532 if utils.in_moblab_ssp(): 533 _tweak_args_for_ssp_moblab(servo_args_override) 534 logging.debug( 535 'Overriding provided servo_args (%s) with arguments' 536 ' determined from the host (%s)', 537 servo_args, 538 servo_args_override, 539 ) 540 servo_args = servo_args_override 541 542 if servo_args is None: 543 logging.debug('No servo_args provided, and failed to find overrides.') 544 return None 545 if SERVO_HOST_ATTR not in servo_args: 546 logging.debug('%s attribute missing from servo_args: %s', 547 SERVO_HOST_ATTR, servo_args) 548 return None 549 if (not servo_dependency and not try_servo_repair and 550 not servo_host_is_up(servo_args[SERVO_HOST_ATTR])): 551 logging.debug('ServoHost is not up.') 552 return None 553 554 newhost = ServoHost(**servo_args) 555 try: 556 newhost.restart_servod(quick_startup=True) 557 except error.AutoservSSHTimeout: 558 logging.warning("Restart servod failed due ssh connection " 559 "to servohost timed out. This error is forgiven" 560 " here, we will retry in servo repair process.") 561 except error.AutoservRunError as e: 562 logging.warning("Restart servod failed due to:\n%s\n" 563 "This error is forgiven here, we will retry" 564 " in servo repair process.", str(e)) 565 566 # TODO(gregorynisbet): Clean all of this up. 567 logging.debug('create_servo_host: attempt to set info store on ' 568 'servo host') 569 try: 570 if dut_host_info is None: 571 logging.debug('create_servo_host: dut_host_info is ' 572 'None, skipping') 573 else: 574 newhost.set_dut_host_info(dut_host_info) 575 logging.debug('create_servo_host: successfully set info ' 576 'store') 577 except Exception: 578 logging.error("create_servo_host: (%s)", traceback.format_exc()) 579 580 # Note that the logic of repair() includes everything done 581 # by verify(). It's sufficient to call one or the other; 582 # we don't need both. 583 if servo_dependency: 584 newhost.repair(silent=True) 585 return newhost 586 587 if try_servo_repair: 588 try: 589 newhost.repair() 590 except Exception: 591 logging.exception('servo repair failed for %s', newhost.hostname) 592 else: 593 try: 594 newhost.verify() 595 except Exception: 596 logging.exception('servo verify failed for %s', newhost.hostname) 597 return newhost 598