1# Lint as: python2, python3 2# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6import logging, sys, time 7from autotest_lib.client.common_lib import error 8from autotest_lib.server import autotest 9from autotest_lib.server import hosts 10from autotest_lib.server import test 11 12class hardware_StorageStress(test.test): 13 """ 14 Integrity stress test for storage device 15 """ 16 version = 1 17 18 _HOURS_IN_SEC = 3600 19 # Define default value for the test case 20 _TEST_GAP = 60 # 1 min 21 _TEST_DURATION = 12 * _HOURS_IN_SEC 22 _SUSPEND_DURATION = _HOURS_IN_SEC 23 _FIO_REQUIREMENT_FILE = '8k_async_randwrite' 24 _FIO_WRITE_FLAGS = [] 25 _FIO_VERIFY_FLAGS = ['--verifyonly'] 26 _FIO_TEST = 'hardware_StorageFio' 27 28 def run_once(self, client_ip, gap=_TEST_GAP, duration=_TEST_DURATION, 29 power_command='reboot', storage_test_command='integrity', 30 suspend_duration=_SUSPEND_DURATION, storage_test_argument='', 31 cq=False, nonroot_dev=False): 32 """ 33 Run the Storage stress test 34 Use hardwareStorageFio to run some test_command repeatedly for a long 35 time. Between each iteration of test command, run power command such as 36 reboot or suspend. 37 38 @param client_ip: string of client's ip address (required) 39 @param gap: gap between each test (second) default = 1 min 40 @param duration: duration to run test (second) default = 12 hours 41 @param power_command: command to do between each test Command 42 possible command: reboot / suspend / nothing 43 @param storage_test_command: FIO command to run 44 - integrity: Check data integrity 45 - full_write: Check performance consistency 46 for full disk write. Use argument 47 to determine which disk to write 48 @param suspend_duration: if power_command is suspend, how long the DUT 49 is suspended. 50 @param cq: Indicates that this test is being run as part of 51 the cq. This is not used to test a component for 52 qualification, but to test the storage qual suite 53 """ 54 55 # in a cq run, do not execute the test, just output 56 # the order that the test would have run in 57 if cq: 58 label = 'suspend' if power_command is 'suspend' else 'soak' 59 self.write_test_keyval( 60 {'storage_qual_cq': ('%f hardware_StorageStress_%s' 61 % (time.time(), label))}) 62 return 63 64 if nonroot_dev: 65 self._FIO_TEST = 'hardware_StorageFioOther' 66 67 # init test 68 if not client_ip: 69 raise error.TestError("Must provide client's IP address to test") 70 71 self._client = hosts.create_host(client_ip) 72 self._client_at = autotest.Autotest(self._client) 73 self._results = {} 74 self._suspend_duration = suspend_duration 75 76 # parse power command 77 if power_command == 'nothing': 78 self._power_func = self._do_nothing 79 elif power_command == 'reboot': 80 self._power_func = self._do_reboot 81 elif power_command == 'suspend': 82 self._power_func = self._do_suspend 83 elif power_command == 'wait': 84 self._power_func = self._do_wait 85 else: 86 raise error.TestFail( 87 'Test failed with error: Invalid power command') 88 89 # Test is doing a lot of disk activity, monitor disk data at each iteration. 90 self.job.add_sysinfo_logfile('/var/log/storage_info.txt', on_every_test=True) 91 92 # parse test command 93 if storage_test_command == 'integrity': 94 setup_func = self._write_data 95 loop_func = self._verify_data 96 elif storage_test_command == 'full_write': 97 setup_func = self._do_nothing 98 loop_func = self._full_disk_write 99 # Do at least 2 soak runs. Given the absolute minimum of a loop is 100 # around 1h, duration should be at least 1h. 101 self._soak_time = min(self._TEST_DURATION, duration / 4) 102 else: 103 raise error.TestFail('Test failed with error: Invalid test command') 104 105 # init statistic variable 106 min_time_per_loop = sys.maxsize 107 max_time_per_loop = 0 108 all_loop_time = 0 109 avr_time_per_loop = 0 110 self._loop_count = 0 111 setup_func() 112 113 start_time = time.time() 114 115 while time.time() - start_time < duration: 116 # sleep 117 time.sleep(gap) 118 119 self._loop_count += 1 120 121 # do power command & verify data & calculate time 122 loop_start_time = time.time() 123 loop_func() 124 loop_time = time.time() - loop_start_time 125 126 # update statistic 127 all_loop_time += loop_time 128 min_time_per_loop = min(loop_time, min_time_per_loop) 129 max_time_per_loop = max(loop_time, max_time_per_loop) 130 131 if self._loop_count > 0: 132 avr_time_per_loop = all_loop_time / self._loop_count 133 134 logging.info(str('check data count: %d' % self._loop_count)) 135 136 # report result 137 self.write_perf_keyval({'loop_count':self._loop_count}) 138 self.write_perf_keyval({'min_time_per_loop':min_time_per_loop}) 139 self.write_perf_keyval({'max_time_per_loop':max_time_per_loop}) 140 self.write_perf_keyval({'avr_time_per_loop':avr_time_per_loop}) 141 142 def _do_nothing(self): 143 pass 144 145 def _do_wait(self): 146 time.sleep(self._suspend_duration) 147 148 def _do_reboot(self): 149 """ 150 Reboot host machine 151 """ 152 self._client.reboot() 153 154 def _do_suspend(self): 155 """ 156 Suspend host machine 157 """ 158 self._client.suspend(suspend_time=self._suspend_duration) 159 160 def _write_data(self): 161 """ 162 Write test data to host using hardware_StorageFio 163 """ 164 logging.info('_write_data') 165 self._client_at.run_test(self._FIO_TEST, 166 check_client_result=True, disable_sysinfo=True, wait=0, 167 tag='%s_%d' % ('write_data', self._loop_count), 168 requirements=[(self._FIO_REQUIREMENT_FILE, self._FIO_WRITE_FLAGS)]) 169 170 def _verify_data(self): 171 """ 172 Verify test data using hardware_StorageFio 173 """ 174 logging.info(str('_verify_data #%d' % self._loop_count)) 175 self._client_at.run_test(self._FIO_TEST, 176 check_client_result=True, disable_sysinfo=True, wait=0, 177 tag='%s_%d' % ('verify_data', self._loop_count), 178 requirements=[(self._FIO_REQUIREMENT_FILE, self._FIO_VERIFY_FLAGS)]) 179 self._power_func() 180 181 def _full_disk_write(self): 182 """ 183 Do the root device full area write and report performance 184 Write random pattern for few hours, then do a write and a verify, 185 noting the latency. 186 """ 187 logging.info(str('_full_disk_write #%d' % self._loop_count)) 188 189 # use the default requirement that write different pattern arround. 190 self._client_at.run_test(self._FIO_TEST, 191 check_client_result=True, 192 disable_sysinfo=True, 193 tag='%s_%d' % ('soak', self._loop_count), 194 requirements=[('64k_stress', [])], 195 time_length=self._soak_time) 196 197 self._power_func() 198 199 self._client_at.run_test(self._FIO_TEST, 200 check_client_result=True, 201 disable_sysinfo=True, 202 tag='%s_%d' % ('surf', self._loop_count), 203 requirements=[('surfing', [])], 204 time_length=self._soak_time) 205 206 self._power_func() 207 208 self._client_at.run_test(self._FIO_TEST, 209 check_client_result=True, 210 disable_sysinfo=True, 211 tag='%s_%d' % ('integrity', self._loop_count), 212 wait=0, integrity=True) 213 214 self._power_func() 215 216 self._client_at.run_test('hardware_StorageWearoutDetect', 217 tag='%s_%d' % ('wearout', self._loop_count), 218 wait=0, use_cached_result=False) 219 # No checkout for wearout, to test device pass their limits. 220