1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import logging 6import re 7import sys 8import time 9 10from autotest_lib.client.common_lib import error 11from autotest_lib.client.common_lib import global_config 12from autotest_lib.client.common_lib.cros import dev_server 13from autotest_lib.server import afe_utils 14from autotest_lib.server import test 15from autotest_lib.server import utils 16from autotest_lib.server.cros import provision 17from autotest_lib.server.cros import provisioner 18 19try: 20 from chromite.lib import metrics 21except ImportError: 22 metrics = utils.metrics_mock 23 24_CONFIG = global_config.global_config 25# pylint: disable-msg=E1120 26_IMAGE_URL_PATTERN = _CONFIG.get_config_value('CROS', 27 'image_url_pattern', 28 type=str) 29 30 31def _metric_name(base_name): 32 return 'chromeos/autotest/provision/' + base_name 33 34 35def _get_build_metrics_fields(build_name): 36 try: 37 return utils.ParseBuildName(build_name)[0:2] 38 except utils.ParseBuildNameException: 39 logging.warning( 40 'Unable to parse build name %s for metrics. ' 41 'Continuing anyway.', build_name) 42 return ('', '') 43 44 45def _emit_updater_metrics(name_prefix, build_name, failure_reason, duration, 46 fields): 47 # reset_after=True is required for String gauges events to ensure that 48 # the metrics are not repeatedly emitted until the server restarts. 49 metrics.String(_metric_name(name_prefix + '_build_by_devserver_dut'), 50 reset_after=True).set(build_name, fields=fields) 51 if failure_reason: 52 metrics.String(_metric_name(name_prefix + 53 '_failure_reason_by_devserver_dut'), 54 reset_after=True).set(failure_reason, fields=fields) 55 metrics.SecondsDistribution( 56 _metric_name(name_prefix + '_duration_by_devserver_dut')).add( 57 duration, fields=fields) 58 59 60def _emit_provision_metrics(update_url, dut_host_name, exception, duration): 61 # The following is high cardinality, but sparse. 62 # Each DUT is of a single board type, and likely build type. 63 # 64 # TODO(jrbarnette) The devserver-triggered provisioning code 65 # includes retries in certain cases. For that reason, the metrics 66 # distinguish 'provision' metrics which summarizes across all 67 # retries, and 'auto_update' which summarizes an individual update 68 # attempt. ChromiumOSProvisioner doesn't do retries, so we just report 69 # the same information twice. We should replace the metrics with 70 # something better tailored to the current implementation. 71 build_name = provisioner.url_to_image_name(update_url) 72 board, build_type = _get_build_metrics_fields(build_name) 73 fields = { 74 'board': board, 75 'build_type': build_type, 76 'dut_host_name': dut_host_name, 77 'dev_server': dev_server.get_resolved_hostname(update_url), 78 'success': not exception, 79 } 80 failure_reason = provisioner.get_update_failure_reason(exception) 81 _emit_updater_metrics('provision', build_name, failure_reason, duration, 82 fields) 83 fields['attempt'] = 1 84 _emit_updater_metrics('auto_update', build_name, failure_reason, duration, 85 fields) 86 87 88class provision_QuickProvision(test.test): 89 """A test that can provision a machine to the correct ChromeOS version.""" 90 version = 1 91 92 def initialize(self, host, value, is_test_na=False): 93 """Initialize. 94 95 @param host: The host object to update to |value|. 96 @param value: The build type and version to install on the host. 97 @param is_test_na: boolean, if True, will simply skip the test 98 and emit TestNAError. The control file 99 determines whether the test should be skipped 100 and passes the decision via this argument. Note 101 we can't raise TestNAError in control file as it won't 102 be caught and handled properly. 103 """ 104 if is_test_na: 105 raise error.TestNAError( 106 'Test not available for test_that. chroot detected, ' 107 'you are probably using test_that.') 108 # We check value in initialize so that it fails faster. 109 if not value: 110 raise error.TestFail('No build version specified.') 111 112 def run_once(self, host, value): 113 """The method called by the control file to start the test. 114 115 @param host: The host object to update to |value|. 116 @param value: The host object to provision with a build corresponding 117 to |value|. 118 """ 119 with_cheets = False 120 logging.debug('Start provisioning %s to %s.', host, value) 121 if value.endswith(provision.CHEETS_SUFFIX): 122 image = re.sub(provision.CHEETS_SUFFIX + '$', '', value) 123 with_cheets = True 124 else: 125 image = value 126 127 # If the host is already on the correct build, we have nothing to do. 128 # Note that this means we're not doing any sort of stateful-only 129 # update, and that we're relying more on cleanup to do cleanup. 130 info = host.host_info_store.get() 131 if info.build == value: 132 # We can't raise a TestNA, as would make sense, as that makes 133 # job.run_test return False as if the job failed. However, it'd 134 # still be nice to get this into the status.log, so we manually 135 # emit an INFO line instead. 136 self.job.record('INFO', None, None, 137 'Host already running %s' % value) 138 return 139 140 try: 141 ds = dev_server.ImageServer.resolve(image, host.hostname) 142 except dev_server.DevServerException as e: 143 raise error.TestFail, str(e), sys.exc_info()[2] 144 145 url = _IMAGE_URL_PATTERN % (ds.url(), image) 146 147 logging.debug('Installing image from URL: %s', url) 148 start_time = time.time() 149 failure = None 150 try: 151 afe_utils.machine_install_and_update_labels(host, 152 url, 153 with_cheets, 154 staging_server=ds) 155 except BaseException as e: 156 failure = e 157 raise 158 finally: 159 _emit_provision_metrics(url, host.hostname, failure, 160 time.time() - start_time) 161 logging.debug('Finished provisioning %s to %s', host, value) 162