1# Copyright 2017 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import logging 6import pprint 7import time 8import re 9 10from autotest_lib.client.common_lib import error 11from autotest_lib.server import autotest 12from autotest_lib.server.cros.faft.cr50_test import Cr50Test 13 14 15class firmware_Cr50DeviceState(Cr50Test): 16 """Verify Cr50 tracks the EC and AP state correctly. 17 18 Put the device through S0, S0ix, S3, and G3. Cr50 responds to these state 19 changes by enabling/disabling uart and changing its suspend type. Verify 20 that none of these cause any interrupt storms on Cr50. Make sure that there 21 aren't any interrupt storms and that Cr50 enters regular or deep sleep a 22 reasonable amount of times. 23 """ 24 version = 1 25 26 DEEP_SLEEP_STEP_SUFFIX = ' Num Deep Sleep Steps' 27 28 KEY_CMD_END_TIME = -4 29 # Use negative numbers to keep track of counts not in the IRQ list. 30 KEY_DEEP_SLEEP = -3 31 KEY_TIME = -2 32 KEY_RESET = -1 33 KEY_REGULAR_SLEEP = 112 34 INT_NAME = { 35 KEY_RESET : 'Reset Count', 36 KEY_DEEP_SLEEP : 'Deep Sleep Count', 37 KEY_TIME : 'Cr50 Time', 38 4 : 'HOST_CMD_DONE', 39 81 : 'GPIO0', 40 98 : 'GPIO1', 41 103 : 'I2CS WRITE', 42 KEY_REGULAR_SLEEP : 'PMU WAKEUP', 43 113 : 'AC present FED', 44 114 : 'AC present RED', 45 124 : 'RBOX_INTR_PWRB', 46 130 : 'SPS CS deassert', 47 138 : 'SPS RXFIFO LVL', 48 159 : 'SPS RXFIFO overflow', 49 160 : 'EVENT TIMER', 50 174 : 'CR50_RX_SERVO_TX', 51 177 : 'CR50_TX_SERVO_RX', 52 181 : 'AP_TX_CR50_RX', 53 184 : 'AP_RX_CR50_TX', 54 188 : 'EC_TX_CR50_RX', 55 191 : 'EC_RX_CR50_TX', 56 193 : 'USB', 57 } 58 IGNORED_KEYS = [KEY_CMD_END_TIME] 59 SLEEP_KEYS = [ KEY_REGULAR_SLEEP, KEY_DEEP_SLEEP ] 60 # USB, AP UART, and EC UART should be disabled if ccd is disabled. 61 CCD_KEYS = [ 181, 184, 188, 191, 193 ] 62 63 # Cr50 won't enable any form of sleep until it has been up for 20 seconds. 64 SLEEP_DELAY = 20 65 # The time in seconds to wait in each state. Wait one minute so it's long 66 # enough for cr50 to settle into whatever state. 60 seconds is also long 67 # enough that cr50 has enough time to enter deep sleep twice, so we can 68 # catch extra wakeups. 69 SLEEP_TIME = 60 70 SHORT_WAIT = 5 71 CONSERVATIVE_WAIT_TIME = SLEEP_TIME * 2 72 # Cr50 should wake up twice per second while in regular sleep 73 SLEEP_RATE = 2 74 75 DEEP_SLEEP_MAX = 2 76 ARM = 'ARM ' 77 # If there are over 100,000 interrupts, it is an interrupt storm. 78 DEFAULT_COUNTS = [0, 100000] 79 # A dictionary of ok count values for each irq that shouldn't follow the 80 # DEFAULT_COUNTS range. 81 EXPECTED_IRQ_COUNT_RANGE = { 82 KEY_RESET : [0, 0], 83 KEY_DEEP_SLEEP : [0, DEEP_SLEEP_MAX], 84 KEY_TIME : [0, CONSERVATIVE_WAIT_TIME], 85 'S0ix' + DEEP_SLEEP_STEP_SUFFIX : [0, 0], 86 # Cr50 may enter deep sleep an extra time, because of how the test 87 # collects taskinfo counts. Just verify that it does enter deep sleep 88 'S3' + DEEP_SLEEP_STEP_SUFFIX : [1, 2], 89 'G3' + DEEP_SLEEP_STEP_SUFFIX : [1, 2], 90 # ARM devices don't enter deep sleep in S3 91 ARM + 'S3' + DEEP_SLEEP_STEP_SUFFIX : [0, 0], 92 ARM + 'G3' + DEEP_SLEEP_STEP_SUFFIX : [1, 2], 93 # Regular sleep is calculated based on the cr50 time 94 } 95 96 # Each line relevant taskinfo output should be 13 characters long with only 97 # digits or spaces. Use this information to make sure every taskinfo command 98 # gets the full relevant output. There are 4 characters for the irq number 99 # and 9 for the count. 100 GET_TASKINFO = ['IRQ counts by type:\s+(([\d ]{13}\r\n)+)Service calls'] 101 102 START = '' 103 INCREASE = '+' 104 DS_RESUME = 'DS' 105 106 MEM_SLEEP_PATH = '/sys/power/mem_sleep' 107 MEM_SLEEP_S0IX = 'echo %s > %s ; sleep 1' % ('s2idle', MEM_SLEEP_PATH) 108 MEM_SLEEP_S3 = 'echo %s > %s ; sleep 1' % ('deep', MEM_SLEEP_PATH) 109 POWER_STATE_PATH = '/sys/power/state' 110 POWER_STATE_S0IX = 'echo %s > %s' % ('freeze', POWER_STATE_PATH) 111 POWER_STATE_S3 = 'echo %s > %s' % ('mem', POWER_STATE_PATH) 112 113 114 def initialize(self, host, cmdline_args, full_args): 115 super(firmware_Cr50DeviceState, self).initialize(host, cmdline_args, 116 full_args) 117 # Don't bother if there is no Chrome EC or if EC hibernate doesn't work. 118 if not self.check_ec_capability(): 119 raise error.TestNAError("Nothing needs to be tested on this device") 120 121 self.generate_suspend_commands() 122 123 124 def generate_suspend_commands(self): 125 """Generate the S3 and S0ix suspend commands""" 126 s0ix_cmds = [] 127 s3_cmds = [] 128 if self.host.path_exists(self.MEM_SLEEP_PATH): 129 s0ix_cmds.append(self.MEM_SLEEP_S0IX) 130 s3_cmds.append(self.MEM_SLEEP_S3) 131 s0ix_cmds.append(self.POWER_STATE_S0IX) 132 s3_cmds.append(self.POWER_STATE_S3) 133 self._s0ix_cmds = '; '.join(s0ix_cmds) 134 self._s3_cmds = '; '.join(s3_cmds) 135 logging.info('S0ix cmd: %r', self._s0ix_cmds) 136 logging.info('S3 cmd: %r', self._s3_cmds) 137 138 139 def log_sleep_debug_information(self): 140 """Log some information used for debugging sleep issues""" 141 logging.debug( 142 self.cr50.send_command_retry_get_output('sleepmask', 143 ['sleepmask.*>'], 144 safe=True)[0]) 145 logging.debug( 146 self.cr50.send_command_retry_get_output('sysinfo', 147 ['sysinfo.*>'], 148 safe=True)[0]) 149 150 151 def get_taskinfo_output(self): 152 """Return a dict with the irq numbers as keys and counts as values""" 153 output = self.cr50.send_command_retry_get_output('taskinfo', 154 self.GET_TASKINFO, safe=True, retries=10)[0][1].strip() 155 logging.debug(output) 156 return output 157 158 159 def get_irq_counts(self): 160 """Return a dict with the irq numbers as keys and counts as values""" 161 irq_counts = { self.KEY_REGULAR_SLEEP : 0 } 162 # Running all of these commands may take a while. Track how much time 163 # commands are running, so we can offset the cr50 time to set sleep 164 # expectations 165 start_cmd_time = int(self.cr50.gettime()) 166 irq_counts[self.KEY_TIME] = start_cmd_time 167 168 output = self.get_taskinfo_output() 169 irq_list = re.findall('\d+\s+\d+\r', output) 170 # Make sure the regular sleep irq is in the dictionary, even if cr50 171 # hasn't seen any interrupts. It's important the test sees that there's 172 # never an interrupt. 173 for irq_info in irq_list: 174 logging.debug(irq_info) 175 num, count = irq_info.split() 176 irq_counts[int(num)] = int(count) 177 irq_counts[self.KEY_RESET] = int(self.cr50.get_reset_count()) 178 irq_counts[self.KEY_DEEP_SLEEP] = int(self.cr50.get_deep_sleep_count()) 179 # Log some information, so we can debug issues with sleep. 180 self.log_sleep_debug_information() 181 # Track when the commands end, so the test can ignore the time spent 182 # running these console commands. 183 end_cmd_time = int(self.cr50.gettime()) 184 irq_counts[self.KEY_CMD_END_TIME] = end_cmd_time 185 logging.info('Commands finished in %d seconds', 186 end_cmd_time - start_cmd_time) 187 return irq_counts 188 189 190 def get_expected_count(self, irq_key, cr50_time, idle): 191 """Get the expected irq increase for the given irq and state 192 193 Args: 194 irq_key: the irq int 195 cr50_time: the cr50 time in seconds 196 idle: Cr50 should enter regular sleep while the device is idle. 197 198 Returns: 199 A list with the expected irq count range [min, max] 200 """ 201 # CCD will prevent sleep 202 if self.ccd_enabled and (irq_key in self.SLEEP_KEYS or 203 self.DEEP_SLEEP_STEP_SUFFIX in str(irq_key)): 204 return [0, 0] 205 if irq_key == self.KEY_REGULAR_SLEEP: 206 # If cr50_time is really low, we probably woke cr50 up using 207 # taskinfo, which would be a pmu wakeup. 208 if cr50_time < 5: 209 return [0, 1] 210 # Only enforce the minimum regular sleep count if the device is 211 # idle. Cr50 may not enter regular sleep during power state 212 # transitions. 213 if idle: 214 min_count = max(cr50_time - self.SLEEP_DELAY, 0) 215 else: 216 min_count = 0 217 # Check that cr50 isn't continuously entering and exiting sleep. 218 # The PMU wakeups should happen around twice a second. Depending 219 # on TPM activity it may occur more often. Add 2 to the multiplier 220 # to allow for extra wakeups. This is mostly to catch issues that 221 # cause cr50 to wake up 100 times a second 222 max_count = cr50_time * (self.SLEEP_RATE + 2) 223 return [min_count, max_count] 224 # If ccd is disabled, ccd irq counts should not increase. 225 if not self.ccd_enabled and (irq_key in self.CCD_KEYS): 226 return [0, 0] 227 return self.EXPECTED_IRQ_COUNT_RANGE.get(irq_key, self.DEFAULT_COUNTS) 228 229 230 def check_increase(self, irq_key, name, increase, expected_range): 231 """Verify the irq count is within the expected range 232 233 Args: 234 irq_key: the irq int 235 name: the irq name string 236 increase: the irq count 237 expected_range: A list with the valid irq count range [min, max] 238 239 Returns: 240 '' if increase is in the given range. If the increase isn't in the 241 range, it returns an error message. 242 """ 243 min_count, max_count = expected_range 244 if min_count > increase or max_count < increase: 245 err_msg = '%s %s: %s not in range %s' % (name, irq_key, increase, 246 expected_range) 247 return err_msg 248 return '' 249 250 251 def get_step_events(self): 252 """Use the deep sleep counts to determine the step events""" 253 ds_counts = self.get_irq_step_counts(self.KEY_DEEP_SLEEP) 254 events = [] 255 for i, count in enumerate(ds_counts): 256 if not i: 257 events.append(self.START) 258 elif count != ds_counts[i - 1]: 259 # If the deep sleep count changed, Cr50 recovered deep sleep 260 # and the irq counts are reset. 261 events.append(self.DS_RESUME) 262 else: 263 events.append(self.INCREASE) 264 return events 265 266 267 def get_irq_step_counts(self, irq_key): 268 """Get a list of the all of the step counts for the given irq""" 269 return [ irq_dict.get(irq_key, 0) for irq_dict in self.steps ] 270 271 272 def check_for_errors(self, state): 273 """Check for unexpected IRQ counts at each step. 274 275 Find the irq count errors and add them to run_errors. 276 277 Args: 278 state: The power state: S0, S0ix, S3, or G3. 279 """ 280 num_steps = len(self.steps) 281 # Get all of the deep sleep counts 282 events = self.get_step_events() 283 284 irq_list = list(self.irqs) 285 irq_list.sort() 286 287 # Pad the start of the step names, so the names align with each step 288 # count. 289 irq_diff = ['%24s|%s|' % ('', '|'.join(self.step_names))] 290 step_errors = [ [] for i in range(num_steps) ] 291 292 end_cmd_times = self.get_irq_step_counts(self.KEY_CMD_END_TIME) 293 start_cmd_times = self.get_irq_step_counts(self.KEY_TIME) 294 cr50_time_diff = [] 295 for i, start_time in enumerate(start_cmd_times): 296 # The test collects a lot of information using console commands. 297 # These take a long time to run and cr50 isn't idle. The idle 298 # time is the time this step started minus the time the last set of 299 # commands finished running. 300 if events[i] == self.INCREASE: 301 cr50_time_diff.append(start_time - end_cmd_times[i - 1]) 302 else: 303 cr50_time_diff.append(start_time) 304 305 # Go through each irq and update its info in the progress dict 306 for irq_key in irq_list: 307 if irq_key in self.IGNORED_KEYS: 308 continue 309 name = self.INT_NAME.get(irq_key, 'Unknown') 310 # Print the IRQ name on the left of the column and the irq number 311 # on the right. 312 irq_progress_str = ['%-19s %3s' % (name, irq_key)] 313 314 irq_counts = self.get_irq_step_counts(irq_key) 315 for step, count in enumerate(irq_counts): 316 event = events[step] 317 318 # The deep sleep counts are not reset after deep sleep. Change 319 # the event to INCREASE. 320 if irq_key == self.KEY_DEEP_SLEEP and event == self.DS_RESUME: 321 event = self.INCREASE 322 323 if event == self.INCREASE: 324 count -= irq_counts[step - 1] 325 326 # Check that the count increase is within the expected value. 327 if event != self.START: 328 step_name = self.step_names[step].strip() 329 # TODO(b/153891388): raise actual error once the servo 330 # character loss issue is fixed. 331 if count < 0: 332 raise error.TestNAError('%s test found negative %s ' 333 'count %r (likely due to servo ' 334 'dropping characters)' % 335 (step, step_name, count)) 336 expected_range = self.get_expected_count(irq_key, 337 cr50_time_diff[step], idle='idle' in step_name) 338 339 rv = self.check_increase(irq_key, name, count, 340 expected_range) 341 if rv: 342 logging.info('Unexpected count in %s test: %s %s', 343 state, step_name, rv) 344 step_errors[step].append(rv) 345 346 irq_progress_str.append(' %2s %8d' % (event, count)) 347 348 # Separate each step count with '|'. Add '|' to the start and end of 349 # the line. 350 irq_diff.append('|%s|' % '|'.join(irq_progress_str)) 351 352 errors = {} 353 354 ds_key = self.ARM if self.is_arm else '' 355 ds_key += state + self.DEEP_SLEEP_STEP_SUFFIX 356 expected_range = self.get_expected_count(ds_key, 0, False) 357 rv = self.check_increase(None, ds_key, events.count(self.DS_RESUME), 358 expected_range) 359 if rv: 360 logging.info('Unexpected count for %s %s', state, rv) 361 errors[ds_key] = rv 362 for i, step_error in enumerate(step_errors): 363 if step_error: 364 logging.error('Step %d errors:\n%s', i, 365 pprint.pformat(step_error)) 366 step = '%s step %d %s' % (state, i, self.step_names[i].strip()) 367 errors[step] = step_error 368 369 logging.info('DIFF %s IRQ Counts:\n%s', state, '\n'.join(irq_diff)) 370 if errors: 371 logging.info('ERRORS %s IRQ Counts:\n%s', state, 372 pprint.pformat(errors)) 373 self.run_errors.update(errors) 374 375 376 def ap_is_on_after_power_button_press(self): 377 """Returns True if the AP is on after pressing the power button""" 378 # TODO (mruthven): use self.servo.power_short_press() once kukui power 379 # button issues are figured out. 380 self.servo.power_key(1) 381 # Give the AP some time to turn on 382 time.sleep(self.cr50.SHORT_WAIT) 383 return self.cr50.ap_is_on() 384 385 386 def trigger_s0(self): 387 """Press the power button so the DUT will wake up.""" 388 if self.ap_is_on_after_power_button_press(): 389 return 390 # Try a second time to see if the AP comes up. 391 if not self.ap_is_on_after_power_button_press(): 392 raise error.TestError('Could not wake the AP using power button') 393 logging.warning('Had to press power button twice to wake the AP') 394 395 396 def enter_state(self, state): 397 """Get the command to enter the power state""" 398 block = True 399 if state == 'S0': 400 self.trigger_s0() 401 else: 402 if state == 'S0ix': 403 full_command = self._s0ix_cmds 404 block = False 405 elif state == 'S3': 406 full_command = self._s3_cmds 407 block = False 408 elif state == 'G3': 409 full_command = 'poweroff' 410 self.faft_client.system.run_shell_command(full_command, block) 411 412 time.sleep(self.SHORT_WAIT); 413 # check state transition 414 if not self.wait_power_state(state, self.SHORT_WAIT): 415 raise error.TestFail('Platform failed to reach %s state.' % state) 416 417 418 def stage_irq_add(self, irq_dict, name=''): 419 """Add the current irq counts to the stored dictionary of irq info""" 420 self.steps.append(irq_dict) 421 self.step_names.append(name.center(12)) 422 self.irqs.update(irq_dict.keys()) 423 logging.info('%s:\n%s', name, pprint.pformat(irq_dict)) 424 425 426 def reset_irq_counts(self): 427 """Reset the test IRQ counts""" 428 self.steps = [] 429 self.step_names = [] 430 self.irqs = set() 431 432 433 def run_transition(self, state): 434 """Enter the given power state and reenter s0 435 436 Enter the power state and return to S0. Wait long enough to ensure cr50 437 will enter sleep mode, so we can verify that as well. 438 439 Args: 440 state: the power state: S0ix, S3, or G3 441 """ 442 self.reset_irq_counts() 443 444 # Enter the given state 445 self.enter_state(state) 446 self.stage_irq_add(self.get_irq_counts(), 'entered %s' % state) 447 448 logging.info('waiting %d seconds', self.SLEEP_TIME) 449 time.sleep(self.SLEEP_TIME) 450 # Nothing is really happening. Cr50 should basically be idle during 451 # SLEEP_TIME. 452 self.stage_irq_add(self.get_irq_counts(), 'idle in %s' % state) 453 454 # Return to S0 455 self.enter_state('S0') 456 self.stage_irq_add(self.get_irq_counts(), 'entered S0') 457 458 logging.info('waiting %d seconds', self.SLEEP_TIME) 459 time.sleep(self.SLEEP_TIME) 460 461 self.stage_irq_add(self.get_irq_counts(), 'idle in S0') 462 463 464 def verify_state(self, state): 465 """Verify cr50 behavior while running through the power state""" 466 467 try: 468 self.run_transition(state) 469 finally: 470 # reset the system to S0 no matter what happens 471 self.trigger_s0() 472 473 # Check that the progress of the irq counts seems reasonable 474 self.check_for_errors(state) 475 476 477 def is_arm_family(self): 478 """Returns True if the DUT is an ARM device.""" 479 arch = self.host.run('arch').stdout.strip() 480 return arch in ['aarch64', 'armv7l'] 481 482 483 def run_through_power_states(self): 484 """Go through S0ix, S3, and G3. Verify there are no interrupt storms""" 485 self._try_to_bring_dut_up() 486 self.run_errors = {} 487 self.ccd_str = 'ccd ' + ('enabled' if self.ccd_enabled else 'disabled') 488 logging.info('Running through states with %s', self.ccd_str) 489 490 self.cr50.get_ccdstate() 491 if not self.cr50.get_sleepmask() and self.ccd_enabled: 492 logging.info('Sleepmask is not keeping cr50 up with ccd enabled') 493 self.all_errors[self.ccd_str] = 'usb is not active with ccd enabled' 494 return 495 496 # Login before entering S0ix so cr50 will be able to enter regular sleep 497 client_at = autotest.Autotest(self.host) 498 client_at.run_test('login_LoginSuccess') 499 500 # Check if the device supports S0ix. The exit status will be 0 if it 501 # does 1 if it doesn't. 502 result = self.host.run('check_powerd_config --suspend_to_idle', 503 ignore_status=True) 504 if not result.exit_status: 505 self.verify_state('S0ix') 506 507 # Enter S3 508 self.verify_state('S3') 509 510 # Enter G3 511 self.verify_state('G3') 512 if self.run_errors: 513 self.all_errors[self.ccd_str] = self.run_errors 514 515 516 def run_once(self, host): 517 """Go through S0ix, S3, and G3. Verify there are no interrupt storms""" 518 self.all_errors = {} 519 self.host = host 520 self.is_arm = self.is_arm_family() 521 supports_dts_control = self.cr50.servo_dts_mode_is_valid() 522 523 if supports_dts_control: 524 self.cr50.ccd_disable(raise_error=True) 525 526 self.ccd_enabled = self.cr50.ccd_is_enabled() 527 self.run_through_power_states() 528 529 if supports_dts_control: 530 ccd_was_enabled = self.ccd_enabled 531 self.cr50.ccd_enable(raise_error=supports_dts_control) 532 self.ccd_enabled = self.cr50.ccd_is_enabled() 533 # If the first run had ccd disabled, and the test was able to enable 534 # ccd, run through the states again to make sure there are no issues 535 # come up when ccd is enabled. 536 if not ccd_was_enabled and self.ccd_enabled: 537 self.run_through_power_states() 538 else: 539 logging.info('Current setup only supports test with ccd %sabled.', 540 'en' if self.ccd_enabled else 'dis') 541 542 self.trigger_s0() 543 if self.all_errors: 544 raise error.TestFail('Unexpected Device State: %s' % 545 self.all_errors) 546 if not supports_dts_control: 547 raise error.TestNAError('Verified device state with %s. Please ' 548 'run with type c servo v4 to test full device state.' % 549 self.ccd_str) 550