• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import logging
6import re
7
8from autotest_lib.client.common_lib import error
9from autotest_lib.server.cros import moblab_test
10from autotest_lib.server.hosts import moblab_host
11from autotest_lib.utils import labellib
12
13
14_CLEANUP_TIME_M = 5
15_MOBLAB_IMAGE_STORAGE = '/mnt/moblab/static'
16
17class moblab_StorageQual(moblab_test.MoblabTest):
18    """
19    Moblab storage qual suite test. Ensures that moblab can run the storage
20    qual tests on the correct DUTs in the correct order. This test does not
21    perform any destructive disk operations.
22
23    The test requires 2 duts, labeled 'storage_qual_cq_1', 'storage_qual_cq_2'.
24    Each DUT will run a sequence of tests, and the test will then verify
25    that the correct tests ran on the correctly labeled DUT, in the correct
26    order.
27    """
28    version = 1
29
30    # Moblab expects to have 1 dut with each of these labels
31    REQUIRED_LABELS = {'storage_qual_cq_1', 'storage_qual_cq_2'}
32
33    EXPECTED_RESULTS = {
34        'storage_qual_cq_1': [
35            'hardware_StorageQualBase_before',
36            'hardware_StorageStress_soak',
37            'hardware_StorageStress_soak',
38            'hardware_StorageStress_suspend',
39            'hardware_StorageQualBase_after'
40        ],
41        'storage_qual_cq_2': [
42            'hardware_StorageQualBase_before',
43            'hardware_StorageStress_soak',
44            'hardware_StorageStress_soak',
45            'hardware_StorageQualTrimStress',
46            'hardware_StorageQualTrimStress',
47            'hardware_StorageQualBase_after'
48        ]
49    }
50
51    def run_once(self, host, moblab_suite_max_retries,
52                 target_build='', clear_devserver_cache=True,
53                 test_timeout_hint_m=None):
54        """Runs a suite on a Moblab Host against its test DUTS.
55
56        @param host: Moblab Host that will run the suite.
57        @param moblab_suite_max_retries: The maximum number of test retries
58                allowed within the suite launched on moblab.
59        @param target_build: Optional build to be use in the run_suite
60                call on moblab. This argument is passed as is to run_suite. It
61                must be a sensible build target for the board of the sub-DUTs
62                attached to the moblab.
63        @param clear_devserver_cache: If True, image cache of the devserver
64                running on moblab is cleared before running the test to validate
65                devserver imaging staging flow.
66        @param test_timeout_hint_m: (int) Optional overall timeout for the test.
67                For this test, it is very important to collect post failure data
68                from the moblab device. If the overall timeout is provided, the
69                test will try to fail early to save some time for log collection
70                from the DUT.
71
72        @raises AutoservRunError if the suite does not complete successfully.
73        """
74        self._host = host
75        self._maybe_clear_devserver_cache(clear_devserver_cache)
76
77        duts = host.afe.get_hosts()
78        if len(duts) == 0:
79            raise error.TestFail('All hosts for this MobLab are down. Please '
80                                 'request the lab admins to take a look.')
81
82        board = None
83        dut_to_label = {}
84        for dut in duts:
85            # Fetch the board of the DUT's assigned to this Moblab. There should
86            # only be one type.
87            board = labellib.LabelsMapping(dut.labels)['board']
88            for label in dut.labels:
89                if label in self.REQUIRED_LABELS:
90                    dut_to_label[dut.hostname] = label
91
92        if not set(dut_to_label.values()) == self.REQUIRED_LABELS:
93            raise error.TestFail(
94                'Missing required labels on hosts %s, are some hosts down?'
95                    % self.REQUIRED_LABELS - set(dut_to_label.values()))
96
97        if not board:
98            raise error.TestFail('Could not determine board from hosts.')
99
100        if not target_build:
101            stable_version_map = host.afe.get_stable_version_map(
102                    host.afe.CROS_IMAGE_TYPE)
103            target_build = stable_version_map.get_image_name(board)
104
105        logging.info('Running suite: hardware_storagequal_cq')
106        cmd = ("%s/site_utils/run_suite.py --pool='' --board=%s --build=%s "
107               "--suite_name=hardware_storagequal_cq --retry=True "
108               "--max_retries=%d" %
109               (moblab_host.AUTOTEST_INSTALL_DIR, board, target_build,
110               moblab_suite_max_retries))
111        cmd, run_suite_timeout_s = self._append_run_suite_timeout(
112                cmd,
113                test_timeout_hint_m,
114        )
115
116        logging.debug('Run suite command: %s', cmd)
117        try:
118            result = host.run_as_moblab(cmd, timeout=run_suite_timeout_s)
119        except error.AutoservRunError as e:
120            if _is_run_suite_error_critical(e.result_obj.exit_status):
121                raise
122
123        logging.debug('Suite Run Output:\n%s', result.stderr)
124
125        job_ids = self._get_job_ids_from_suite_output(result.stderr)
126
127        logging.debug('Suite job ids %s', job_ids)
128
129        keyvals_per_host = self._get_keyval_files_per_host(host, job_ids)
130
131        logging.debug('Keyvals grouped by host %s', keyvals_per_host)
132
133        failed_test = False
134        for hostname in keyvals_per_host:
135            label = dut_to_label[hostname]
136            expected = self.EXPECTED_RESULTS[label]
137            actual = self._get_test_execution_order(
138                host, keyvals_per_host[hostname])
139
140            logging.info('Comparing test order for %s from host %s',
141                label, hostname)
142            logging.info('%-37s %s', 'Expected', 'Actual')
143            for i in range(max(len(expected), len(actual))):
144                expected_i = expected[i] if i < len(expected) else None
145                actual_i = actual[i] if i < len(actual) else None
146                check_fail = expected_i != actual_i
147                check_text = 'X' if check_fail else ' '
148                logging.info('%s %-35s %s', check_text, expected_i, actual_i)
149                failed_test = failed_test or check_fail
150
151        # Cache directory can contain large binaries like CTS/CTS zip files
152        # no need to offload those in the results.
153        # The cache is owned by root user
154        host.run('rm -fR /mnt/moblab/results/shared/cache',
155                    timeout=600)
156
157        if failed_test:
158            raise error.TestFail(
159                'Actual test execution order did not match expected')
160
161    def _append_run_suite_timeout(self, cmd, test_timeout_hint_m):
162        """Modify given run_suite command with timeout.
163
164        @param cmd: run_suite command str.
165        @param test_timeout_hint_m: (int) timeout for the test, or None.
166        @return cmd, run_suite_timeout_s: cmd is the updated command str,
167                run_suite_timeout_s is the timeout to use for the run_suite
168                call, in seconds.
169        """
170        if test_timeout_hint_m is None:
171            return cmd, 10800
172
173        # Arguments passed in via test_args may be all str, depending on how
174        # they're passed in.
175        test_timeout_hint_m = int(test_timeout_hint_m)
176        elasped_m = self.elapsed.total_seconds() / 60
177        run_suite_timeout_m = (
178                test_timeout_hint_m - elasped_m - _CLEANUP_TIME_M)
179        logging.info('Overall test timeout hint provided (%d minutes)',
180                     test_timeout_hint_m)
181        logging.info('%d minutes have already elasped', elasped_m)
182        logging.info(
183                'Keeping %d minutes for cleanup, will allow %d minutes for '
184                'the suite to run.', _CLEANUP_TIME_M, run_suite_timeout_m)
185        cmd += ' --timeout_mins %d' % run_suite_timeout_m
186        return cmd, run_suite_timeout_m * 60
187
188    def _maybe_clear_devserver_cache(self, clear_devserver_cache):
189        # When passed in via test_args, all arguments are str
190        if not isinstance(clear_devserver_cache, bool):
191            clear_devserver_cache = (clear_devserver_cache.lower() == 'true')
192        if clear_devserver_cache:
193            self._host.run('rm -rf %s/*' % _MOBLAB_IMAGE_STORAGE)
194
195    def _get_job_ids_from_suite_output(self, suite_output):
196        """Parse the set of job ids from run_suite output
197
198        @param suite_output (str) output from run_suite command
199        @return (set<int>) job ids contained in the suite
200        """
201        job_ids = set()
202        job_id_pattern = re.compile('(\d+)-moblab')
203        for line in suite_output.splitlines():
204            match = job_id_pattern.search(line)
205            logging.debug('suite line %s match %s', line, match)
206            if match is None:
207                continue
208            job_ids.add(int(match.groups()[0]))
209        return job_ids
210
211    def _get_keyval_files_per_host(self, host, job_ids):
212        """Find the result keyval files for the given job ids and
213        group them by host
214
215        @param host (moblab_host)
216        @param job_ids (set<int>) set of job ids to find keyvals for
217        @return (dict<str, list<str>>) map of hosts and the keyval
218            file locations
219        @throws AutoservRunError if the command fails to run on moblab
220        """
221        keyvals_per_host = {}
222        keyvals = host.run_as_moblab(
223            'find /mnt/moblab/results '
224            '-wholename *-moblab/192.168*/hardware_Storage*/keyval')
225        pattern = re.compile('(\d+)-moblab/(192.168.\d+.\d+)')
226        for line in keyvals.stdout.splitlines():
227            match = pattern.search(line)
228            if match is None:
229                continue
230            job_id, dut = match.groups()
231            if int(job_id) not in job_ids:
232                continue
233            if dut not in keyvals_per_host:
234                keyvals_per_host[dut] = []
235            keyvals_per_host[dut].append(line)
236
237        return keyvals_per_host
238
239    def _get_test_execution_order(self, host, keyvals):
240        """Determines the test execution order for the given list
241        of storage qual test result keyvals
242
243        @param host (moblab_host)
244        @param keyvals (list<str>) location of keyval files to order
245        @return (list<str>) test names in the order they executed
246        @throws AutoservRunError if the command fails to run on moblab
247        """
248        tests = host.run_as_moblab(
249            'FILES=(%s); for FILE in ${FILES[@]}; do cat $FILE '
250            '| grep storage_qual_cq; done '
251            '| sort | cut -d " " -f 2'
252            % ' '.join(keyvals)
253        )
254        test_execution_order = []
255        pattern = re.compile('hardware_\w+')
256        logging.debug(tests.stdout)
257        for line in tests.stdout.splitlines():
258            match = pattern.search(line)
259            if match:
260                test_execution_order.append(match.group(0))
261        return test_execution_order
262
263def _is_run_suite_error_critical(return_code):
264    # We can't actually import run_suite here because importing run_suite pulls
265    # in certain MySQLdb dependencies that fail to load in the context of a
266    # test.
267    # OTOH, these return codes are unlikely to change because external users /
268    # builders depend on them.
269    return return_code not in (
270            0,  # run_suite.RETURN_CODES.OK
271            2,  # run_suite.RETURN_CODES.WARNING
272    )
273