1# Copyright 2016 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import contextlib 6import datetime 7import logging 8import pprint 9import time 10 11import common 12from autotest_lib.client.common_lib import error 13from autotest_lib.client.common_lib import utils as client_utils 14from autotest_lib.client.common_lib.cros.network import ap_constants 15from autotest_lib.client.common_lib.cros.network import iw_runner 16from autotest_lib.server import hosts 17from autotest_lib.server import site_linux_system 18from autotest_lib.server.cros import host_lock_manager 19from autotest_lib.server.cros.ap_configurators import ap_batch_locker 20from autotest_lib.server.cros.ap_configurators \ 21 import ap_configurator_factory 22from autotest_lib.server.cros.network import chaos_clique_utils as utils 23from autotest_lib.server.cros.network import wifi_client 24 25# Webdriver master hostname 26MASTERNAME = 'chromeos3-chaosvmmaster.cros.corp.google.com' 27WEBDRIVER_PORT = 9515 28 29 30class ChaosRunner(object): 31 """Object to run a network_WiFi_ChaosXXX test.""" 32 33 34 def __init__(self, test, host, spec, broken_pdus=list()): 35 """Initializes and runs test. 36 37 @param test: a string, test name. 38 @param host: an Autotest host object, device under test. 39 @param spec: an APSpec object. 40 @param broken_pdus: list of offline PDUs. 41 42 """ 43 self._test = test 44 self._host = host 45 self._ap_spec = spec 46 self._broken_pdus = broken_pdus 47 # Log server and DUT times 48 dt = datetime.datetime.now() 49 logging.info('Server time: %s', dt.strftime('%a %b %d %H:%M:%S %Y')) 50 logging.info('DUT time: %s', self._host.run('date').stdout.strip()) 51 52 53 def run(self, job, batch_size=10, tries=10, capturer_hostname=None, 54 conn_worker=None, work_client_hostname=None, 55 disabled_sysinfo=False): 56 """Executes Chaos test. 57 58 @param job: an Autotest job object. 59 @param batch_size: an integer, max number of APs to lock in one batch. 60 @param tries: an integer, number of iterations to run per AP. 61 @param capturer_hostname: a string or None, hostname or IP of capturer. 62 @param conn_worker: ConnectionWorkerAbstract or None, to run extra 63 work after successful connection. 64 @param work_client_hostname: a string or None, hostname of work client 65 @param disabled_sysinfo: a bool, disable collection of logs from DUT. 66 67 68 @raises TestError: Issues locking VM webdriver instance 69 """ 70 71 lock_manager = host_lock_manager.HostLockManager() 72 webdriver_master = hosts.SSHHost(MASTERNAME, user='chaosvmmaster') 73 host_prefix = self._host.hostname.split('-')[0] 74 with host_lock_manager.HostsLockedBy(lock_manager): 75 capture_host = utils.allocate_packet_capturer( 76 lock_manager, hostname=capturer_hostname, 77 prefix=host_prefix) 78 # Cleanup and reboot packet capturer before the test. 79 utils.sanitize_client(capture_host) 80 capturer = site_linux_system.LinuxSystem(capture_host, {}, 81 'packet_capturer') 82 83 # Run iw scan and abort if more than allowed number of APs are up. 84 iw_command = iw_runner.IwRunner(capture_host) 85 start_time = time.time() 86 logging.info('Performing a scan with a max timeout of 30 seconds.') 87 capture_interface = 'wlan0' 88 capturer_info = capture_host.run('cat /etc/lsb-release', 89 ignore_status=True, timeout=5).stdout 90 if 'whirlwind' in capturer_info: 91 # Use the dual band aux radio for scanning networks. 92 capture_interface = 'wlan2' 93 while time.time() - start_time <= ap_constants.MAX_SCAN_TIMEOUT: 94 networks = iw_command.scan(capture_interface) 95 if networks is None: 96 if (time.time() - start_time == 97 ap_constants.MAX_SCAN_TIMEOUT): 98 raise error.TestError( 99 'Packet capturer is not responding to scans. Check' 100 'device and re-run test') 101 continue 102 elif len(networks) < ap_constants.MAX_SSID_COUNT: 103 break 104 elif len(networks) >= ap_constants.MAX_SSID_COUNT: 105 raise error.TestError( 106 'Probably someone is already running a ' 107 'chaos test?!') 108 109 if conn_worker is not None: 110 work_client_machine = utils.allocate_packet_capturer( 111 lock_manager, hostname=work_client_hostname) 112 conn_worker.prepare_work_client(work_client_machine) 113 114 # Lock VM. If on, power off; always power on. Then create a tunnel. 115 webdriver_instance = utils.allocate_webdriver_instance(lock_manager) 116 117 if utils.is_VM_running(webdriver_master, webdriver_instance): 118 logging.info('VM %s was on; powering off for a clean instance', 119 webdriver_instance) 120 utils.power_off_VM(webdriver_master, webdriver_instance) 121 logging.info('Allow VM time to gracefully shut down') 122 time.sleep(5) 123 124 logging.info('Starting up VM %s', webdriver_instance) 125 utils.power_on_VM(webdriver_master, webdriver_instance) 126 logging.info('Allow VM time to power on before creating a tunnel.') 127 time.sleep(30) 128 129 if not client_utils.host_is_in_lab_zone(webdriver_instance.hostname): 130 self._ap_spec._webdriver_hostname = webdriver_instance.hostname 131 else: 132 # If in the lab then port forwarding must be done so webdriver 133 # connection will be over localhost. 134 self._ap_spec._webdriver_hostname = 'localhost' 135 webdriver_tunnel = webdriver_instance.create_ssh_tunnel( 136 WEBDRIVER_PORT, WEBDRIVER_PORT) 137 logging.info('Wait for tunnel to be created.') 138 for i in range(3): 139 time.sleep(10) 140 results = client_utils.run('lsof -i:%s' % WEBDRIVER_PORT, 141 ignore_status=True) 142 if results: 143 break 144 if not results: 145 raise error.TestError( 146 'Unable to listen to WEBDRIVER_PORT: %s', results) 147 148 batch_locker = ap_batch_locker.ApBatchLocker( 149 lock_manager, self._ap_spec, 150 ap_test_type=ap_constants.AP_TEST_TYPE_CHAOS) 151 152 while batch_locker.has_more_aps(): 153 # Work around for CrOS devices only:crbug.com/358716 154 utils.sanitize_client(self._host) 155 healthy_dut = True 156 157 with contextlib.closing(wifi_client.WiFiClient( 158 hosts.create_host( 159 { 160 'hostname' : self._host.hostname, 161 'afe_host' : self._host._afe_host, 162 'host_info_store': 163 self._host.host_info_store, 164 }, 165 host_class=self._host.__class__, 166 ), 167 './debug', 168 False, 169 )) as client: 170 171 aps = batch_locker.get_ap_batch(batch_size=batch_size) 172 if not aps: 173 logging.info('No more APs to test.') 174 break 175 176 # Power down all of the APs because some can get grumpy 177 # if they are configured several times and remain on. 178 # User the cartridge to down group power downs and 179 # configurations. 180 utils.power_down_aps(aps, self._broken_pdus) 181 utils.configure_aps(aps, self._ap_spec, self._broken_pdus) 182 183 aps = utils.filter_quarantined_and_config_failed_aps(aps, 184 batch_locker, job, self._broken_pdus) 185 186 for ap in aps: 187 # http://crbug.com/306687 188 if ap.ssid == None: 189 logging.error('The SSID was not set for the AP:%s', 190 ap) 191 192 healthy_dut = utils.is_dut_healthy(client, ap) 193 194 if not healthy_dut: 195 logging.error('DUT is not healthy, rebooting.') 196 batch_locker.unlock_and_reclaim_aps() 197 break 198 199 networks = utils.return_available_networks( 200 ap, capturer, job, self._ap_spec) 201 202 if networks is None: 203 # If scan returned no networks, iw scan failed. 204 # Reboot the packet capturer device and 205 # reconfigure the capturer. 206 batch_locker.unlock_and_reclaim_ap(ap.host_name) 207 logging.error('Packet capture is not healthy, ' 208 'rebooting.') 209 capturer.host.reboot() 210 capturer = site_linux_system.LinuxSystem( 211 capture_host, {},'packet_capturer') 212 continue 213 if networks == list(): 214 # Packet capturer did not find the SSID in scan or 215 # there was a security mismatch. 216 utils.release_ap(ap, batch_locker, self._broken_pdus) 217 continue 218 219 assoc_params = ap.get_association_parameters() 220 221 if not utils.is_conn_worker_healthy( 222 conn_worker, ap, assoc_params, job): 223 utils.release_ap( 224 ap, batch_locker, self._broken_pdus) 225 continue 226 227 name = ap.name 228 kernel_ver = self._host.get_kernel_ver() 229 firmware_ver = utils.get_firmware_ver(self._host) 230 if not firmware_ver: 231 firmware_ver = "Unknown" 232 233 debug_dict = {'+++PARSE DATA+++': '+++PARSE DATA+++', 234 'SSID': ap._ssid, 235 'DUT': client.wifi_mac, 236 'AP Info': ap.name, 237 'kernel_version': kernel_ver, 238 'wifi_firmware_version': firmware_ver} 239 debug_string = pprint.pformat(debug_dict) 240 241 logging.info('Waiting %d seconds for the AP dhcp ' 242 'server', ap.dhcp_delay) 243 time.sleep(ap.dhcp_delay) 244 245 result = job.run_test(self._test, 246 capturer=capturer, 247 capturer_frequency=networks[0].frequency, 248 capturer_ht_type=networks[0].ht, 249 host=self._host, 250 assoc_params=assoc_params, 251 client=client, 252 tries=tries, 253 debug_info=debug_string, 254 # Copy all logs from the system 255 disabled_sysinfo=disabled_sysinfo, 256 conn_worker=conn_worker, 257 tag=ap.ssid if conn_worker is None else 258 '%s.%s' % (conn_worker.name, ap.ssid)) 259 260 utils.release_ap(ap, batch_locker, self._broken_pdus) 261 262 if conn_worker is not None: 263 conn_worker.cleanup() 264 265 if not healthy_dut: 266 continue 267 268 batch_locker.unlock_aps() 269 270 if webdriver_tunnel: 271 webdriver_instance.disconnect_ssh_tunnel(webdriver_tunnel, 272 WEBDRIVER_PORT) 273 webdriver_instance.close() 274 capturer.close() 275 logging.info('Powering off VM %s', webdriver_instance) 276 utils.power_off_VM(webdriver_master, webdriver_instance) 277 lock_manager.unlock(webdriver_instance.hostname) 278 279 if self._broken_pdus: 280 logging.info('PDU is down!!!\nThe following PDUs are down:\n') 281 pprint.pprint(self._broken_pdus) 282 283 factory = ap_configurator_factory.APConfiguratorFactory( 284 ap_constants.AP_TEST_TYPE_CHAOS) 285 factory.turn_off_all_routers(self._broken_pdus) 286