1# Copyright 2016 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import functools 6import logging 7 8import common 9from autotest_lib.client.common_lib import hosts 10from autotest_lib.server.cros.servo import servo 11from autotest_lib.server.hosts import repair_utils 12 13 14def ignore_exception_for_non_cros_host(func): 15 """ 16 Decorator to ignore ControlUnavailableError if servo host is not cros host. 17 When using test_that command on a workstation, this enables usage of 18 additional servo devices such as servo micro and Sweetberry. This shall not 19 change any lab behavior. 20 """ 21 @functools.wraps(func) 22 def wrapper(self, host): 23 """ 24 Wrapper around func. 25 """ 26 try: 27 func(self, host) 28 except servo.ControlUnavailableError as e: 29 if host.is_cros_host(): 30 raise 31 logging.warning("Servo host is not cros host, ignore %s: %s", 32 type(e).__name__, e) 33 return wrapper 34 35 36class _UpdateVerifier(hosts.Verifier): 37 """ 38 Verifier to trigger a servo host update, if necessary. 39 40 The operation doesn't wait for the update to complete and is 41 considered a success whether or not the servo is currently 42 up-to-date. 43 """ 44 45 def verify(self, host): 46 # First, only run this verifier if the host is in the physical lab. 47 # Secondly, skip if the test is being run by test_that, because subnet 48 # restrictions can cause the update to fail. 49 try: 50 if host.is_in_lab() and host.job and host.job.in_lab: 51 host.update_image(wait_for_update=False) 52 # We don't want failure from update block DUT repair action. 53 # See crbug.com/1029950. 54 except Exception as e: 55 logging.error('Failed to update servohost image: %s', e) 56 57 @property 58 def description(self): 59 return 'servo host software is up-to-date' 60 61 62class _ConfigVerifier(hosts.Verifier): 63 """ 64 Base verifier for the servo config file verifiers. 65 """ 66 67 CONFIG_FILE = '/var/lib/servod/config' 68 ATTR = '' 69 70 @staticmethod 71 def _get_config_val(host, config_file, attr): 72 """ 73 Get the `attr` for `host` from `config_file`. 74 75 @param host Host to be checked for `config_file`. 76 @param config_file Path to the config file to be tested. 77 @param attr Attribute to get from config file. 78 79 @return The attr val as set in the config file, or `None` if 80 the file was absent. 81 """ 82 getboard = ('CONFIG=%s ; [ -f $CONFIG ] && ' 83 '. $CONFIG && echo $%s' % (config_file, attr)) 84 attr_val = host.run(getboard, ignore_status=True).stdout 85 return attr_val.strip('\n') if attr_val else None 86 87 @staticmethod 88 def _validate_attr(host, val, expected_val, attr, config_file): 89 """ 90 Check that the attr setting is valid for the host. 91 92 This presupposes that a valid config file was found. Raise an 93 execption if: 94 * There was no attr setting from the file (i.e. the setting 95 is an empty string), or 96 * The attr setting is valid, the attr is known, 97 and the setting doesn't match the DUT. 98 99 @param host Host to be checked for `config_file`. 100 @param val Value to be tested. 101 @param expected_val Expected value. 102 @param attr Attribute we're validating. 103 @param config_file Path to the config file to be tested. 104 """ 105 if not val: 106 raise hosts.AutoservVerifyError( 107 'config file %s exists, but %s ' 108 'is not set' % (attr, config_file)) 109 if expected_val is not None and val != expected_val: 110 raise hosts.AutoservVerifyError( 111 '%s is %s; it should be %s' % (attr, val, expected_val)) 112 113 114 def _get_config(self, host): 115 """ 116 Return the config file to check. 117 118 @param host Host object. 119 120 @return The config file to check. 121 """ 122 return '%s_%d' % (self.CONFIG_FILE, host.servo_port) 123 124 @property 125 def description(self): 126 return 'servo %s setting is correct' % self.ATTR 127 128 129class _SerialConfigVerifier(_ConfigVerifier): 130 """ 131 Verifier for the servo SERIAL configuration. 132 """ 133 134 ATTR = 'SERIAL' 135 136 def verify(self, host): 137 """ 138 Test whether the `host` has a `SERIAL` setting configured. 139 140 This tests the config file names used by the `servod` upstart 141 job for a valid setting of the `SERIAL` variable. The following 142 conditions raise errors: 143 * The SERIAL setting doesn't match the DUT's entry in the AFE 144 database. 145 * There is no config file. 146 """ 147 if not host.is_cros_host(): 148 return 149 # Not all servo hosts will have a servo serial so don't verify if it's 150 # not set. 151 if host.servo_serial is None: 152 return 153 config = self._get_config(host) 154 serialval = self._get_config_val(host, config, self.ATTR) 155 if serialval is None: 156 raise hosts.AutoservVerifyError( 157 'Servo serial is unconfigured; should be %s' 158 % host.servo_serial 159 ) 160 161 self._validate_attr(host, serialval, host.servo_serial, self.ATTR, 162 config) 163 164 165 166class _BoardConfigVerifier(_ConfigVerifier): 167 """ 168 Verifier for the servo BOARD configuration. 169 """ 170 171 ATTR = 'BOARD' 172 173 def verify(self, host): 174 """ 175 Test whether the `host` has a `BOARD` setting configured. 176 177 This tests the config file names used by the `servod` upstart 178 job for a valid setting of the `BOARD` variable. The following 179 conditions raise errors: 180 * A config file exists, but the content contains no setting 181 for BOARD. 182 * The BOARD setting doesn't match the DUT's entry in the AFE 183 database. 184 * There is no config file. 185 """ 186 if not host.is_cros_host(): 187 return 188 config = self._get_config(host) 189 boardval = self._get_config_val(host, config, self.ATTR) 190 if boardval is None: 191 msg = 'Servo board is unconfigured' 192 if host.servo_board is not None: 193 msg += '; should be %s' % host.servo_board 194 raise hosts.AutoservVerifyError(msg) 195 196 self._validate_attr(host, boardval, host.servo_board, self.ATTR, 197 config) 198 199 200class _ServodJobVerifier(hosts.Verifier): 201 """ 202 Verifier to check that the `servod` upstart job is running. 203 """ 204 205 def verify(self, host): 206 if not host.is_cros_host(): 207 return 208 status_cmd = 'status servod PORT=%d' % host.servo_port 209 job_status = host.run(status_cmd, ignore_status=True).stdout 210 if 'start/running' not in job_status: 211 raise hosts.AutoservVerifyError( 212 'servod not running on %s port %d' % 213 (host.hostname, host.servo_port)) 214 215 @property 216 def description(self): 217 return 'servod upstart job is running' 218 219 220class _DiskSpaceVerifier(hosts.Verifier): 221 """ 222 Verifier to make sure there is enough disk space left on servohost. 223 """ 224 225 def verify(self, host): 226 # Check available space of stateful is greater than threshold, in Gib. 227 host.check_diskspace('/mnt/stateful_partition', 0.5) 228 229 @property 230 def description(self): 231 return 'servohost has enough disk space.' 232 233 234class _ServodConnectionVerifier(hosts.Verifier): 235 """ 236 Verifier to check that we can connect to `servod`. 237 238 This tests the connection to the target servod service with a simple 239 method call. As a side-effect, all servo signals are initialized to 240 default values. 241 242 N.B. Initializing servo signals is necessary because the power 243 button and lid switch verifiers both test against expected initial 244 values. 245 """ 246 247 def verify(self, host): 248 host.connect_servo() 249 250 @property 251 def description(self): 252 return 'servod service is taking calls' 253 254 255class _PowerButtonVerifier(hosts.Verifier): 256 """ 257 Verifier to check sanity of the `pwr_button` signal. 258 259 Tests that the `pwr_button` signal shows the power button has been 260 released. When `pwr_button` is stuck at `press`, it commonly 261 indicates that the ribbon cable is disconnected. 262 """ 263 # TODO (crbug.com/646593) - Remove list below once servo has been updated 264 # with a dummy pwr_button signal. 265 _BOARDS_WO_PWR_BUTTON = ['arkham', 'gale', 'mistral', 'storm', 'whirlwind'] 266 267 @ignore_exception_for_non_cros_host 268 def verify(self, host): 269 if host.servo_board in self._BOARDS_WO_PWR_BUTTON: 270 return 271 button = host.get_servo().get('pwr_button') 272 if button != 'release': 273 raise hosts.AutoservVerifyError( 274 'Check ribbon cable: \'pwr_button\' is stuck') 275 276 277 @property 278 def description(self): 279 return 'pwr_button control is normal' 280 281 282class _LidVerifier(hosts.Verifier): 283 """ 284 Verifier to check sanity of the `lid_open` signal. 285 """ 286 287 @ignore_exception_for_non_cros_host 288 def verify(self, host): 289 lid_open = host.get_servo().get('lid_open') 290 if lid_open != 'yes' and lid_open != 'not_applicable': 291 raise hosts.AutoservVerifyError( 292 'Check lid switch: lid_open is %s' % lid_open) 293 294 @property 295 def description(self): 296 return 'lid_open control is normal' 297 298 299class _RestartServod(hosts.RepairAction): 300 """Restart `servod` with the proper BOARD setting.""" 301 302 def repair(self, host): 303 if not host.is_cros_host(): 304 raise hosts.AutoservRepairError( 305 'Can\'t restart servod: not running ' 306 'embedded Chrome OS.', 307 'servo_not_applicable_to_non_cros_host') 308 host.restart_servod() 309 310 @property 311 def description(self): 312 return 'Start servod with the proper config settings.' 313 314 315class _ServoRebootRepair(repair_utils.RebootRepair): 316 """ 317 Reboot repair action that also waits for an update. 318 319 This is the same as the standard `RebootRepair`, but for 320 a non-multi-DUTs servo host, if there's a pending update, 321 we wait for that to complete before rebooting. This should 322 ensure that the servo_v3 is up-to-date after reboot. Labstation 323 reboot and update is handled by labstation host class. 324 """ 325 326 def repair(self, host): 327 if host.is_localhost() or not host.is_cros_host(): 328 raise hosts.AutoservRepairError( 329 'Target servo is not a test lab servo', 330 'servo_not_applicable_to_host_outside_lab') 331 if host.is_labstation(): 332 host.request_reboot() 333 logging.warning('Reboot labstation requested, it will be ' 334 'handled by labstation administrative task.') 335 else: 336 try: 337 host.update_image(wait_for_update=True) 338 # We don't want failure from update block DUT repair action. 339 # See crbug.com/1029950. 340 except Exception as e: 341 logging.error('Failed to update servohost image: %s', e) 342 super(_ServoRebootRepair, self).repair(host) 343 344 @property 345 def description(self): 346 return 'Wait for update, then reboot servo host.' 347 348 349class _DutRebootRepair(hosts.RepairAction): 350 """ 351 Reboot DUT to recover some servo controls depending on EC console. 352 353 Some servo controls, like lid_open, requires communicating with DUT through 354 EC UART console. Failure of this kinds of controls can be recovered by 355 rebooting the DUT. 356 """ 357 358 def repair(self, host): 359 host.get_servo().get_power_state_controller().reset() 360 # Get the lid_open value which requires EC console. 361 lid_open = host.get_servo().get('lid_open') 362 if lid_open != 'yes' and lid_open != 'not_applicable': 363 raise hosts.AutoservVerifyError( 364 'Still fail to contact EC console after rebooting DUT') 365 366 @property 367 def description(self): 368 return 'Reset the DUT via servo' 369 370 371class _DiskCleanupRepair(hosts.RepairAction): 372 """ 373 Remove old logs/metrics/crash_dumps on servohost to free up disk space. 374 """ 375 KEEP_LOGS_MAX_DAYS = 5 376 377 FILE_TO_REMOVE = ['/var/lib/metrics/uma-events', 378 '/var/spool/crash/*'] 379 380 def repair(self, host): 381 if host.is_localhost(): 382 # we don't want to remove anything from local testing. 383 return 384 385 # Remove old servod logs. 386 host.run('/usr/bin/find /var/log/servod_* -mtime +%d -print -delete' 387 % self.KEEP_LOGS_MAX_DAYS, ignore_status=True) 388 389 # Remove pre-defined metrics and crash dumps. 390 for path in self.FILE_TO_REMOVE: 391 host.run('rm %s' % path, ignore_status=True) 392 393 @property 394 def description(self): 395 return 'Clean up old logs/metrics on servohost to free up disk space.' 396 397 398def create_servo_repair_strategy(): 399 """ 400 Return a `RepairStrategy` for a `ServoHost`. 401 """ 402 config = ['brd_config', 'ser_config'] 403 verify_dag = [ 404 (repair_utils.SshVerifier, 'servo_ssh', []), 405 (_DiskSpaceVerifier, 'disk_space', ['servo_ssh']), 406 (_UpdateVerifier, 'update', ['servo_ssh']), 407 (_BoardConfigVerifier, 'brd_config', ['servo_ssh']), 408 (_SerialConfigVerifier, 'ser_config', ['servo_ssh']), 409 (_ServodJobVerifier, 'job', config + ['disk_space']), 410 (_ServodConnectionVerifier, 'servod', ['job']), 411 (_PowerButtonVerifier, 'pwr_button', ['servod']), 412 (_LidVerifier, 'lid_open', ['servod']), 413 # TODO(jrbarnette): We want a verifier for whether there's 414 # a working USB stick plugged into the servo. However, 415 # although we always want to log USB stick problems, we don't 416 # want to fail the servo because we don't want a missing USB 417 # stick to prevent, say, power cycling the DUT. 418 # 419 # So, it may be that the right fix is to put diagnosis into 420 # ServoInstallRepair rather than add a verifier. 421 ] 422 423 servod_deps = ['job', 'servod', 'pwr_button'] 424 repair_actions = [ 425 (_DiskCleanupRepair, 'disk_cleanup', ['servo_ssh'], ['disk_space']), 426 (_RestartServod, 'restart', ['servo_ssh'], config + servod_deps), 427 (_ServoRebootRepair, 'servo_reboot', ['servo_ssh'], servod_deps), 428 (_DutRebootRepair, 'dut_reboot', ['servod'], ['lid_open']), 429 ] 430 return hosts.RepairStrategy(verify_dag, repair_actions, 'servo') 431