• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2018 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Functions for tracking & reporting a suite run."""
6
7from __future__ import absolute_import
8from __future__ import division
9from __future__ import print_function
10
11import contextlib
12import logging
13import logging.config
14import mysql.connector
15
16from lucifer import autotest
17from skylab_suite import swarming_lib
18from skylab_suite import tko_test_views
19
20# Test status in _IGNORED_TEST_STATE won't be reported as test failure.
21# Or test may be reported as failure as
22# it's probably caused by the DUT is not well-provisioned.
23# TODO: Stop ignoring TASK_NO_RESOURCE if we drop TEST_NA feature.
24# Blocking issues:
25#     - Not all DUT labels are in skylab yet (crbug.com/871978)
26_IGNORED_TEST_STATE = [swarming_lib.TASK_NO_RESOURCE]
27
28
29@contextlib.contextmanager
30def _annotate_step(step_name):
31    try:
32        print('@@@SEED_STEP %s@@@' % step_name)
33        print('@@@STEP_CURSOR %s@@@' % step_name)
34        print('@@@STEP_STARTED@@@')
35        yield
36    finally:
37        print('@@@STEP_CLOSED@@@')
38
39
40def print_child_test_annotations(suite_handler):
41    """Print LogDog annotations for child tests."""
42    with _annotate_step('Scheduled Tests'):
43        for task_id, hspec in suite_handler.task_to_test_maps.iteritems():
44            anchor_test = hspec.test_spec.test.name
45            if suite_handler.is_provision():
46                anchor_test += '-' + hspec.test_spec.dut_name
47
48            show_text = '[Test-logs]: %s' % anchor_test
49            _print_task_result_link_annotation(task_id, show_text)
50
51
52def log_suite_results(suite_name, suite_handler):
53    """Log suite and its child tests' results & links.
54
55    @param suite_job: A cros_suite.Suite object.
56
57    @return the return code of suite decided by its child tests' results.
58    """
59    test_results = _parse_test_results(suite_handler)
60    suite_state, return_code = _get_suite_state(test_results, suite_handler)
61    if not test_results:
62        logging.info(('Suite %s timed out in waiting, test results '
63                      'are not parsed because they may still run.'), suite_name)
64        return return_code
65
66    logging.info('################# SUITE REPORTING #################')
67    logging.info('Suite Job %s %s', suite_name, suite_state)
68    _log_test_results(test_results)
69
70    logging.info('Links to tests:')
71    logging.info('Suite Job %s %s', suite_name,
72                 swarming_lib.get_task_link(suite_handler.suite_id))
73    _log_test_result_links(test_results)
74
75    _log_buildbot_links(suite_handler, suite_name, test_results)
76    return return_code
77
78
79def _get_failed_test_views_from_tko(task_ids):
80    """Get test views corresponding to failed tests from TKO.
81
82    @param task_ids: list of Swarming request IDs.
83    @return {task_id: [tko_test_views.Row()]}
84    """
85    conn = _new_tko_connection()
86    if conn is None:
87        return {}
88
89    try:
90        views = tko_test_views.get(conn, task_ids)
91    except mysql.connector.Error:
92        logging.exception('Failed to obtain failure reasons from TKO')
93        return {}
94    return {k: tko_test_views.filter_failed(v) for k, v in views.iteritems()}
95
96
97def _new_tko_connection():
98    global_config = autotest.load('client.common_lib.global_config')
99    config = global_config.global_config
100    try:
101        host = config.get_config_value('AUTOTEST_WEB', 'global_db_host')
102        user = config.get_config_value('AUTOTEST_WEB', 'global_db_user')
103        password = config.get_config_value('AUTOTEST_WEB', 'global_db_password')
104        database = config.get_config_value('AUTOTEST_WEB', 'database')
105    except global_config.ConfigError:
106        logging.exception('Could not load TKO connection configuration')
107        return None
108    try:
109        if host.startswith('/'):
110            return mysql.connector.connect(unix_socket=host, user=user,
111                                           password=password, database=database)
112        else:
113            return mysql.connector.connect(host=host, user=user,
114                                           password=password, database=database)
115    except mysql.connector.Error:
116        logging.exception('Failed to connect to TKO database')
117        return None
118
119
120def _print_task_result_link_annotation(task_id, text):
121    """Print the link of task logs.
122
123    Given text: 'dummy_Pass-chromeos4-row7-rack6-host19'
124          task_id: '3ee300e77a576e10'
125
126    The printed output will be:
127      [Test-logs]: dummy_Pass-chromeos4-row7-rack6-host19
128
129    Clicking it will direct you to
130      https://chrome-swarming.appspot.com/task?id=3ee300e77a576e10
131
132    @param anchor_test: a string to show on link.
133    @param task_id: a string task_id to form the swarming url.
134    """
135    annotations = autotest.chromite_load('buildbot_annotations')
136    print(annotations.StepLink('[Test-logs]: %s' % text,
137                               swarming_lib.get_stainless_logs_link(task_id)))
138
139
140def get_task_id_for_task_summaries(task_id):
141    """Adjust the swarming task id to end in 0 for showing task summaries.
142
143    Milo results are only generated for task summaries, that is, tasks whose
144    ids end in 0. This function adjusts the last digit of the task_id. See
145    https://goo.gl/LE4rwV for details.
146    """
147    return task_id[:-1] + '0'
148
149
150def log_create_task(suite_name, task_id):
151    """Print create task of suite."""
152    annotations = autotest.chromite_load('buildbot_annotations')
153    print(annotations.StepLink(
154            'Link to the suite create task: %s' % suite_name,
155            swarming_lib.get_task_link(
156                    get_task_id_for_task_summaries(task_id))))
157
158
159def log_wait_task(suite_name, task_id):
160    """Print create task of suite."""
161    annotations = autotest.chromite_load('buildbot_annotations')
162    print(annotations.StepLink(
163            'Link to the suite wait task: %s' % suite_name,
164            swarming_lib.get_task_link(
165                    get_task_id_for_task_summaries(task_id))))
166
167
168def _log_buildbot_links(suite_handler, suite_name, test_results):
169    logging.info('Links for buildbot:')
170    if suite_handler.suite_id is not None:
171        log_create_task(suite_name, suite_handler.suite_id)
172
173    if suite_handler.task_id is not None:
174        log_wait_task(suite_name, suite_handler.task_id)
175
176    if (suite_handler.is_provision() and
177        suite_handler.is_provision_successfully_finished()):
178        # There could be some child tasks may still run after provision suite
179        # finishes and claims that it succeeds. Skip logging them in buildbot.
180        return
181
182    failed_results = [t for t in test_results if _is_failed_result(t)]
183    if suite_handler.is_provision():
184        _log_buildbot_links_for_provision_tasks(failed_results)
185    else:
186        _log_buildbot_links_for_tasks(failed_results)
187
188
189def _log_buildbot_links_for_provision_tasks(test_results):
190    for result in test_results:
191        _print_task_result_link_annotation(result['task_ids'][0],
192                                           _get_show_test_name(result))
193
194
195def _log_buildbot_links_for_tasks(test_results):
196    task_ids = []
197    for result in test_results:
198        task_ids += result.get('task_ids', [])
199    failed_test_views = _get_failed_test_views_from_tko(task_ids)
200
201    for result in test_results:
202        task_id = result['task_ids'][0]
203        test_name = result['test_name']
204        if task_id in failed_test_views:
205            for v in failed_test_views[task_id]:
206                _print_task_result_link_annotation(task_id,
207                                                   _reason_from_test_view(v))
208        else:
209            _print_task_result_link_annotation(task_id, test_name)
210        _log_buildbot_links_for_test_history(task_id, test_name)
211
212
213def _log_buildbot_links_for_test_history(task_id, test_name):
214    annotations = autotest.chromite_load('buildbot_annotations')
215    reporting_utils = autotest.load('server.cros.dynamic_suite.reporting_utils')
216    print(annotations.StepLink(
217            '[Test-History]: %s' % test_name,
218            reporting_utils.link_test_history(test_name)))
219
220
221def _reason_from_test_view(test_view):
222    reason = '%s: %s' % (test_view.name, test_view.status)
223    if test_view.reason:
224        reason = '%s: %s' % (reason, test_view.reason)
225    return reason
226
227
228def _log_test_results(test_results):
229    """Log child results for a suite."""
230    logging.info('Start outputing test results:')
231    _log_test_results_with_logging(test_results)
232    _print_test_result_links_in_logdog(test_results)
233
234
235def _get_show_test_name(result):
236    """Get the test_name to show.
237
238    @param result: a test result dictionary, which is one item of the returned
239        list of _parse_test_results.
240    """
241    if result['dut_name']:
242        return result['test_name'] + '-' + result['dut_name']
243
244    return result['test_name']
245
246
247def _log_test_results_with_logging(test_results):
248    name_column_width = max(len(result['test_name']) + len(result['dut_name'])
249                            for result in test_results) + 3
250    for result in test_results:
251        padded_name = _get_show_test_name(result).ljust(name_column_width)
252        logging.info('%s%s', padded_name, result['state'])
253        if result['retry_count'] > 0:
254            logging.info('%s  retry_count: %s', padded_name,
255                         result['retry_count'])
256
257
258def _print_test_result_links_in_logdog(test_results):
259    with _annotate_step('Test Results'):
260        for result in test_results:
261            _print_single_test_result_link(result)
262
263
264def _print_single_test_result_link(result):
265    anchor_test = _get_show_test_name(result)
266    for idx, task_id in enumerate(result['task_ids']):
267        retry_suffix = ' (%dth retry)' % idx if idx > 0 else ''
268        anchor_test += retry_suffix
269        _print_task_result_link_annotation(
270                task_id,
271                '[%s]: %s' % (anchor_test, result['state']))
272
273
274def _parse_test_results(suite_handler):
275    """Parse test results after the suite job is finished.
276
277    @param suite_handler: A cros_suite.SuiteHandler object.
278
279    @return a list of test results.
280    """
281    test_results = []
282    for child_task in suite_handler.get_active_child_tasks(
283            suite_handler.suite_id):
284        task_id = child_task['task_id']
285        logging.info('Parsing task results of %s', task_id)
286        test_handler_spec = suite_handler.get_test_by_task_id(task_id)
287        name = test_handler_spec.test_spec.test.name
288        dut_name = test_handler_spec.test_spec.dut_name
289        retry_count = len(test_handler_spec.previous_retried_ids)
290        all_task_ids = test_handler_spec.previous_retried_ids + [task_id]
291        state = swarming_lib.get_task_final_state(child_task)
292        test_results.append({
293                'test_name': name,
294                'state': state,
295                'dut_name': dut_name,
296                'retry_count': retry_count,
297                'task_ids': all_task_ids})
298
299    return test_results
300
301
302def _get_final_suite_states():
303    run_suite_common = autotest.load('site_utils.run_suite_common')
304    return {
305            swarming_lib.TASK_COMPLETED_FAILURE:
306            (
307                    swarming_lib.TASK_COMPLETED_FAILURE,
308                    run_suite_common.RETURN_CODES.ERROR,
309            ),
310            # Task No_Resource means no available bots to accept the task.
311            # Deputy should check whether it's infra failure.
312            swarming_lib.TASK_NO_RESOURCE:
313            (
314                    swarming_lib.TASK_NO_RESOURCE,
315                    run_suite_common.RETURN_CODES.INFRA_FAILURE,
316            ),
317            # Task expired means a task is not triggered, could be caused by
318            #   1. No healthy DUTs/bots to run it.
319            #   2. Expiration seconds are too low.
320            #   3. Suite run is too slow to finish.
321            # Deputy should check whether it's infra failure.
322            swarming_lib.TASK_EXPIRED:
323            (
324                    swarming_lib.TASK_EXPIRED,
325                    run_suite_common.RETURN_CODES.INFRA_FAILURE,
326            ),
327            # Task canceled means a task is canceled intentionally. Deputy
328            # should check whether it's infra failure.
329            swarming_lib.TASK_CANCELED:
330            (
331                    swarming_lib.TASK_CANCELED,
332                    run_suite_common.RETURN_CODES.INFRA_FAILURE,
333            ),
334            swarming_lib.TASK_TIMEDOUT:
335            (
336                    swarming_lib.TASK_TIMEDOUT,
337                    run_suite_common.RETURN_CODES.SUITE_TIMEOUT,
338            ),
339            # Task pending means a task is still waiting for picking up, but
340            # the suite already hits deadline. So report it as suite TIMEOUT.
341            # It could also be an INFRA_FAILURE due to DUTs/bots shortage.
342            swarming_lib.TASK_PENDING:
343            (
344                    swarming_lib.TASK_TIMEDOUT,
345                    run_suite_common.RETURN_CODES.SUITE_TIMEOUT,
346            ),
347    }
348
349
350def _get_suite_state(child_test_results, suite_handler):
351    """Get a suite's final state and return code."""
352    run_suite_common = autotest.load('site_utils.run_suite_common')
353    if (suite_handler.is_provision() and
354        suite_handler.is_provision_successfully_finished()):
355        logging.info('Provisioned duts:')
356        for dut in list(suite_handler.successfully_provisioned_duts):
357            logging.info(dut)
358        return (swarming_lib.TASK_COMPLETED_SUCCESS,
359                run_suite_common.RETURN_CODES.OK)
360
361    _final_suite_states = _get_final_suite_states()
362    for result in child_test_results:
363        if ((result['state'] not in _IGNORED_TEST_STATE) and
364            result['state'] in _final_suite_states):
365            return _final_suite_states[result['state']]
366
367    return (swarming_lib.TASK_COMPLETED_SUCCESS,
368            run_suite_common.RETURN_CODES.OK)
369
370
371def _log_test_result_links(child_test_results):
372    """Output child results for a suite."""
373    for result in child_test_results:
374        for idx, task_id in enumerate(result['task_ids']):
375            retry_suffix = ' (%dth retry)' % idx if idx > 0 else ''
376            logging.info('%s  %s', result['test_name'] + retry_suffix,
377                         swarming_lib.get_stainless_logs_link(task_id))
378
379
380def setup_logging():
381    """Setup the logging for skylab suite."""
382    logging.config.dictConfig({
383        'version': 1,
384        'formatters': {
385            'default': {'format': '%(asctime)s %(levelname)-5s| %(message)s'},
386        },
387        'handlers': {
388            'screen': {
389                'class': 'logging.StreamHandler',
390                'formatter': 'default',
391            },
392        },
393        'root': {
394            'level': 'INFO',
395            'handlers': ['screen'],
396        },
397        'disable_existing_loggers': False,
398    })
399
400
401def _is_failed_result(result):
402    return result['state'] not in [
403            swarming_lib.TASK_COMPLETED_SUCCESS,
404            swarming_lib.TASK_RUNNING,
405    ]
406
407