# Copyright 2018 The Chromium OS Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Functions for tracking & reporting a suite run.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import contextlib import logging import logging.config import mysql.connector from lucifer import autotest from skylab_suite import swarming_lib from skylab_suite import tko_test_views # Test status in _IGNORED_TEST_STATE won't be reported as test failure. # Or test may be reported as failure as # it's probably caused by the DUT is not well-provisioned. # TODO: Stop ignoring TASK_NO_RESOURCE if we drop TEST_NA feature. # Blocking issues: # - Not all DUT labels are in skylab yet (crbug.com/871978) _IGNORED_TEST_STATE = [swarming_lib.TASK_NO_RESOURCE] @contextlib.contextmanager def _annotate_step(step_name): try: print('@@@SEED_STEP %s@@@' % step_name) print('@@@STEP_CURSOR %s@@@' % step_name) print('@@@STEP_STARTED@@@') yield finally: print('@@@STEP_CLOSED@@@') def print_child_test_annotations(suite_handler): """Print LogDog annotations for child tests.""" with _annotate_step('Scheduled Tests'): for task_id, hspec in suite_handler.task_to_test_maps.iteritems(): anchor_test = hspec.test_spec.test.name if suite_handler.is_provision(): anchor_test += '-' + hspec.test_spec.dut_name show_text = '[Test-logs]: %s' % anchor_test _print_task_result_link_annotation(task_id, show_text) def log_suite_results(suite_name, suite_handler): """Log suite and its child tests' results & links. @param suite_job: A cros_suite.Suite object. @return the return code of suite decided by its child tests' results. """ test_results = _parse_test_results(suite_handler) suite_state, return_code = _get_suite_state(test_results, suite_handler) if not test_results: logging.info(('Suite %s timed out in waiting, test results ' 'are not parsed because they may still run.'), suite_name) return return_code logging.info('################# SUITE REPORTING #################') logging.info('Suite Job %s %s', suite_name, suite_state) _log_test_results(test_results) logging.info('Links to tests:') logging.info('Suite Job %s %s', suite_name, swarming_lib.get_task_link(suite_handler.suite_id)) _log_test_result_links(test_results) _log_buildbot_links(suite_handler, suite_name, test_results) return return_code def _get_failed_test_views_from_tko(task_ids): """Get test views corresponding to failed tests from TKO. @param task_ids: list of Swarming request IDs. @return {task_id: [tko_test_views.Row()]} """ conn = _new_tko_connection() if conn is None: return {} try: views = tko_test_views.get(conn, task_ids) except mysql.connector.Error: logging.exception('Failed to obtain failure reasons from TKO') return {} return {k: tko_test_views.filter_failed(v) for k, v in views.iteritems()} def _new_tko_connection(): global_config = autotest.load('client.common_lib.global_config') config = global_config.global_config try: host = config.get_config_value('AUTOTEST_WEB', 'global_db_host') user = config.get_config_value('AUTOTEST_WEB', 'global_db_user') password = config.get_config_value('AUTOTEST_WEB', 'global_db_password') database = config.get_config_value('AUTOTEST_WEB', 'database') except global_config.ConfigError: logging.exception('Could not load TKO connection configuration') return None try: if host.startswith('/'): return mysql.connector.connect(unix_socket=host, user=user, password=password, database=database) else: return mysql.connector.connect(host=host, user=user, password=password, database=database) except mysql.connector.Error: logging.exception('Failed to connect to TKO database') return None def _print_task_result_link_annotation(task_id, text): """Print the link of task logs. Given text: 'dummy_Pass-chromeos4-row7-rack6-host19' task_id: '3ee300e77a576e10' The printed output will be: [Test-logs]: dummy_Pass-chromeos4-row7-rack6-host19 Clicking it will direct you to https://chrome-swarming.appspot.com/task?id=3ee300e77a576e10 @param anchor_test: a string to show on link. @param task_id: a string task_id to form the swarming url. """ annotations = autotest.chromite_load('buildbot_annotations') print(annotations.StepLink('[Test-logs]: %s' % text, swarming_lib.get_stainless_logs_link(task_id))) def get_task_id_for_task_summaries(task_id): """Adjust the swarming task id to end in 0 for showing task summaries. Milo results are only generated for task summaries, that is, tasks whose ids end in 0. This function adjusts the last digit of the task_id. See https://goo.gl/LE4rwV for details. """ return task_id[:-1] + '0' def log_create_task(suite_name, task_id): """Print create task of suite.""" annotations = autotest.chromite_load('buildbot_annotations') print(annotations.StepLink( 'Link to the suite create task: %s' % suite_name, swarming_lib.get_task_link( get_task_id_for_task_summaries(task_id)))) def log_wait_task(suite_name, task_id): """Print create task of suite.""" annotations = autotest.chromite_load('buildbot_annotations') print(annotations.StepLink( 'Link to the suite wait task: %s' % suite_name, swarming_lib.get_task_link( get_task_id_for_task_summaries(task_id)))) def _log_buildbot_links(suite_handler, suite_name, test_results): logging.info('Links for buildbot:') if suite_handler.suite_id is not None: log_create_task(suite_name, suite_handler.suite_id) if suite_handler.task_id is not None: log_wait_task(suite_name, suite_handler.task_id) if (suite_handler.is_provision() and suite_handler.is_provision_successfully_finished()): # There could be some child tasks may still run after provision suite # finishes and claims that it succeeds. Skip logging them in buildbot. return failed_results = [t for t in test_results if _is_failed_result(t)] if suite_handler.is_provision(): _log_buildbot_links_for_provision_tasks(failed_results) else: _log_buildbot_links_for_tasks(failed_results) def _log_buildbot_links_for_provision_tasks(test_results): for result in test_results: _print_task_result_link_annotation(result['task_ids'][0], _get_show_test_name(result)) def _log_buildbot_links_for_tasks(test_results): task_ids = [] for result in test_results: task_ids += result.get('task_ids', []) failed_test_views = _get_failed_test_views_from_tko(task_ids) for result in test_results: task_id = result['task_ids'][0] test_name = result['test_name'] if task_id in failed_test_views: for v in failed_test_views[task_id]: _print_task_result_link_annotation(task_id, _reason_from_test_view(v)) else: _print_task_result_link_annotation(task_id, test_name) _log_buildbot_links_for_test_history(task_id, test_name) def _log_buildbot_links_for_test_history(task_id, test_name): annotations = autotest.chromite_load('buildbot_annotations') reporting_utils = autotest.load('server.cros.dynamic_suite.reporting_utils') print(annotations.StepLink( '[Test-History]: %s' % test_name, reporting_utils.link_test_history(test_name))) def _reason_from_test_view(test_view): reason = '%s: %s' % (test_view.name, test_view.status) if test_view.reason: reason = '%s: %s' % (reason, test_view.reason) return reason def _log_test_results(test_results): """Log child results for a suite.""" logging.info('Start outputing test results:') _log_test_results_with_logging(test_results) _print_test_result_links_in_logdog(test_results) def _get_show_test_name(result): """Get the test_name to show. @param result: a test result dictionary, which is one item of the returned list of _parse_test_results. """ if result['dut_name']: return result['test_name'] + '-' + result['dut_name'] return result['test_name'] def _log_test_results_with_logging(test_results): name_column_width = max(len(result['test_name']) + len(result['dut_name']) for result in test_results) + 3 for result in test_results: padded_name = _get_show_test_name(result).ljust(name_column_width) logging.info('%s%s', padded_name, result['state']) if result['retry_count'] > 0: logging.info('%s retry_count: %s', padded_name, result['retry_count']) def _print_test_result_links_in_logdog(test_results): with _annotate_step('Test Results'): for result in test_results: _print_single_test_result_link(result) def _print_single_test_result_link(result): anchor_test = _get_show_test_name(result) for idx, task_id in enumerate(result['task_ids']): retry_suffix = ' (%dth retry)' % idx if idx > 0 else '' anchor_test += retry_suffix _print_task_result_link_annotation( task_id, '[%s]: %s' % (anchor_test, result['state'])) def _parse_test_results(suite_handler): """Parse test results after the suite job is finished. @param suite_handler: A cros_suite.SuiteHandler object. @return a list of test results. """ test_results = [] for child_task in suite_handler.get_active_child_tasks( suite_handler.suite_id): task_id = child_task['task_id'] logging.info('Parsing task results of %s', task_id) test_handler_spec = suite_handler.get_test_by_task_id(task_id) name = test_handler_spec.test_spec.test.name dut_name = test_handler_spec.test_spec.dut_name retry_count = len(test_handler_spec.previous_retried_ids) all_task_ids = test_handler_spec.previous_retried_ids + [task_id] state = swarming_lib.get_task_final_state(child_task) test_results.append({ 'test_name': name, 'state': state, 'dut_name': dut_name, 'retry_count': retry_count, 'task_ids': all_task_ids}) return test_results def _get_final_suite_states(): run_suite_common = autotest.load('site_utils.run_suite_common') return { swarming_lib.TASK_COMPLETED_FAILURE: ( swarming_lib.TASK_COMPLETED_FAILURE, run_suite_common.RETURN_CODES.ERROR, ), # Task No_Resource means no available bots to accept the task. # Deputy should check whether it's infra failure. swarming_lib.TASK_NO_RESOURCE: ( swarming_lib.TASK_NO_RESOURCE, run_suite_common.RETURN_CODES.INFRA_FAILURE, ), # Task expired means a task is not triggered, could be caused by # 1. No healthy DUTs/bots to run it. # 2. Expiration seconds are too low. # 3. Suite run is too slow to finish. # Deputy should check whether it's infra failure. swarming_lib.TASK_EXPIRED: ( swarming_lib.TASK_EXPIRED, run_suite_common.RETURN_CODES.INFRA_FAILURE, ), # Task canceled means a task is canceled intentionally. Deputy # should check whether it's infra failure. swarming_lib.TASK_CANCELED: ( swarming_lib.TASK_CANCELED, run_suite_common.RETURN_CODES.INFRA_FAILURE, ), swarming_lib.TASK_TIMEDOUT: ( swarming_lib.TASK_TIMEDOUT, run_suite_common.RETURN_CODES.SUITE_TIMEOUT, ), # Task pending means a task is still waiting for picking up, but # the suite already hits deadline. So report it as suite TIMEOUT. # It could also be an INFRA_FAILURE due to DUTs/bots shortage. swarming_lib.TASK_PENDING: ( swarming_lib.TASK_TIMEDOUT, run_suite_common.RETURN_CODES.SUITE_TIMEOUT, ), } def _get_suite_state(child_test_results, suite_handler): """Get a suite's final state and return code.""" run_suite_common = autotest.load('site_utils.run_suite_common') if (suite_handler.is_provision() and suite_handler.is_provision_successfully_finished()): logging.info('Provisioned duts:') for dut in list(suite_handler.successfully_provisioned_duts): logging.info(dut) return (swarming_lib.TASK_COMPLETED_SUCCESS, run_suite_common.RETURN_CODES.OK) _final_suite_states = _get_final_suite_states() for result in child_test_results: if ((result['state'] not in _IGNORED_TEST_STATE) and result['state'] in _final_suite_states): return _final_suite_states[result['state']] return (swarming_lib.TASK_COMPLETED_SUCCESS, run_suite_common.RETURN_CODES.OK) def _log_test_result_links(child_test_results): """Output child results for a suite.""" for result in child_test_results: for idx, task_id in enumerate(result['task_ids']): retry_suffix = ' (%dth retry)' % idx if idx > 0 else '' logging.info('%s %s', result['test_name'] + retry_suffix, swarming_lib.get_stainless_logs_link(task_id)) def setup_logging(): """Setup the logging for skylab suite.""" logging.config.dictConfig({ 'version': 1, 'formatters': { 'default': {'format': '%(asctime)s %(levelname)-5s| %(message)s'}, }, 'handlers': { 'screen': { 'class': 'logging.StreamHandler', 'formatter': 'default', }, }, 'root': { 'level': 'INFO', 'handlers': ['screen'], }, 'disable_existing_loggers': False, }) def _is_failed_result(result): return result['state'] not in [ swarming_lib.TASK_COMPLETED_SUCCESS, swarming_lib.TASK_RUNNING, ]