1# Copyright 2016 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import contextlib 6import datetime 7import logging 8import pprint 9import time 10 11import common 12from autotest_lib.client.common_lib import error 13from autotest_lib.client.common_lib import utils as client_utils 14from autotest_lib.client.common_lib.cros.network import ap_constants 15from autotest_lib.client.common_lib.cros.network import iw_runner 16from autotest_lib.server import hosts 17from autotest_lib.server import site_linux_system 18from autotest_lib.server.cros import host_lock_manager 19from autotest_lib.server.cros.ap_configurators import ap_batch_locker 20from autotest_lib.server.cros.ap_configurators \ 21 import ap_configurator_factory 22from autotest_lib.server.cros.network import chaos_clique_utils as utils 23from autotest_lib.server.cros.network import wifi_client 24from autotest_lib.server.hosts import adb_host 25 26# Webdriver master hostname 27MASTERNAME = 'chromeos3-chaosvmmaster.cros.corp.google.com' 28WEBDRIVER_PORT = 9515 29 30 31class ChaosRunner(object): 32 """Object to run a network_WiFi_ChaosXXX test.""" 33 34 35 def __init__(self, test, host, spec, broken_pdus=list()): 36 """Initializes and runs test. 37 38 @param test: a string, test name. 39 @param host: an Autotest host object, device under test. 40 @param spec: an APSpec object. 41 @param broken_pdus: list of offline PDUs. 42 43 """ 44 self._test = test 45 self._host = host 46 self._ap_spec = spec 47 self._broken_pdus = broken_pdus 48 # Log server and DUT times 49 dt = datetime.datetime.now() 50 logging.info('Server time: %s', dt.strftime('%a %b %d %H:%M:%S %Y')) 51 logging.info('DUT time: %s', self._host.run('date').stdout.strip()) 52 53 54 def run(self, job, batch_size=10, tries=10, capturer_hostname=None, 55 conn_worker=None, work_client_hostname=None, 56 disabled_sysinfo=False): 57 """Executes Chaos test. 58 59 @param job: an Autotest job object. 60 @param batch_size: an integer, max number of APs to lock in one batch. 61 @param tries: an integer, number of iterations to run per AP. 62 @param capturer_hostname: a string or None, hostname or IP of capturer. 63 @param conn_worker: ConnectionWorkerAbstract or None, to run extra 64 work after successful connection. 65 @param work_client_hostname: a string or None, hostname of work client 66 @param disabled_sysinfo: a bool, disable collection of logs from DUT. 67 68 69 @raises TestError: Issues locking VM webdriver instance 70 """ 71 72 lock_manager = host_lock_manager.HostLockManager() 73 webdriver_master = hosts.SSHHost(MASTERNAME, user='chaosvmmaster') 74 host_prefix = self._host.hostname.split('-')[0] 75 with host_lock_manager.HostsLockedBy(lock_manager): 76 capture_host = utils.allocate_packet_capturer( 77 lock_manager, hostname=capturer_hostname, 78 prefix=host_prefix) 79 # Cleanup and reboot packet capturer before the test. 80 utils.sanitize_client(capture_host) 81 capturer = site_linux_system.LinuxSystem(capture_host, {}, 82 'packet_capturer') 83 84 # Run iw scan and abort if more than allowed number of APs are up. 85 iw_command = iw_runner.IwRunner(capture_host) 86 start_time = time.time() 87 logging.info('Performing a scan with a max timeout of 30 seconds.') 88 capture_interface = 'wlan0' 89 capturer_info = capture_host.run('cat /etc/lsb-release', 90 ignore_status=True, timeout=5).stdout 91 if 'whirlwind' in capturer_info: 92 # Use the dual band aux radio for scanning networks. 93 capture_interface = 'wlan2' 94 while time.time() - start_time <= ap_constants.MAX_SCAN_TIMEOUT: 95 networks = iw_command.scan(capture_interface) 96 if networks is None: 97 if (time.time() - start_time == 98 ap_constants.MAX_SCAN_TIMEOUT): 99 raise error.TestError( 100 'Packet capturer is not responding to scans. Check' 101 'device and re-run test') 102 continue 103 elif len(networks) < ap_constants.MAX_SSID_COUNT: 104 break 105 elif len(networks) >= ap_constants.MAX_SSID_COUNT: 106 raise error.TestError( 107 'Probably someone is already running a ' 108 'chaos test?!') 109 110 if conn_worker is not None: 111 work_client_machine = utils.allocate_packet_capturer( 112 lock_manager, hostname=work_client_hostname) 113 conn_worker.prepare_work_client(work_client_machine) 114 115 # Lock VM. If on, power off; always power on. Then create a tunnel. 116 webdriver_instance = utils.allocate_webdriver_instance(lock_manager) 117 118 if utils.is_VM_running(webdriver_master, webdriver_instance): 119 logging.info('VM %s was on; powering off for a clean instance', 120 webdriver_instance) 121 utils.power_off_VM(webdriver_master, webdriver_instance) 122 logging.info('Allow VM time to gracefully shut down') 123 time.sleep(5) 124 125 logging.info('Starting up VM %s', webdriver_instance) 126 utils.power_on_VM(webdriver_master, webdriver_instance) 127 logging.info('Allow VM time to power on before creating a tunnel.') 128 time.sleep(30) 129 130 if not client_utils.host_is_in_lab_zone(webdriver_instance.hostname): 131 self._ap_spec._webdriver_hostname = webdriver_instance.hostname 132 else: 133 # If in the lab then port forwarding must be done so webdriver 134 # connection will be over localhost. 135 self._ap_spec._webdriver_hostname = 'localhost' 136 webdriver_tunnel = webdriver_instance.create_ssh_tunnel( 137 WEBDRIVER_PORT, WEBDRIVER_PORT) 138 logging.info('Wait for tunnel to be created.') 139 for i in range(3): 140 time.sleep(10) 141 results = client_utils.run('lsof -i:%s' % WEBDRIVER_PORT, 142 ignore_status=True) 143 if results: 144 break 145 if not results: 146 raise error.TestError( 147 'Unable to listen to WEBDRIVER_PORT: %s', results) 148 149 batch_locker = ap_batch_locker.ApBatchLocker( 150 lock_manager, self._ap_spec, 151 ap_test_type=ap_constants.AP_TEST_TYPE_CHAOS) 152 153 while batch_locker.has_more_aps(): 154 # Work around for CrOS devices only:crbug.com/358716 155 # Do not reboot Android devices:b/27977927 156 if self._host.get_os_type() != adb_host.OS_TYPE_ANDROID: 157 utils.sanitize_client(self._host) 158 healthy_dut = True 159 160 with contextlib.closing(wifi_client.WiFiClient( 161 hosts.create_host( 162 { 163 'hostname' : self._host.hostname, 164 'afe_host' : self._host._afe_host, 165 'host_info_store': 166 self._host.host_info_store, 167 }, 168 host_class=self._host.__class__, 169 ), 170 './debug', 171 False, 172 )) as client: 173 174 aps = batch_locker.get_ap_batch(batch_size=batch_size) 175 if not aps: 176 logging.info('No more APs to test.') 177 break 178 179 # Power down all of the APs because some can get grumpy 180 # if they are configured several times and remain on. 181 # User the cartridge to down group power downs and 182 # configurations. 183 utils.power_down_aps(aps, self._broken_pdus) 184 utils.configure_aps(aps, self._ap_spec, self._broken_pdus) 185 186 aps = utils.filter_quarantined_and_config_failed_aps(aps, 187 batch_locker, job, self._broken_pdus) 188 189 for ap in aps: 190 # http://crbug.com/306687 191 if ap.ssid == None: 192 logging.error('The SSID was not set for the AP:%s', 193 ap) 194 195 healthy_dut = utils.is_dut_healthy(client, ap) 196 197 if not healthy_dut: 198 logging.error('DUT is not healthy, rebooting.') 199 batch_locker.unlock_and_reclaim_aps() 200 break 201 202 networks = utils.return_available_networks( 203 ap, capturer, job, self._ap_spec) 204 205 if networks is None: 206 # If scan returned no networks, iw scan failed. 207 # Reboot the packet capturer device and 208 # reconfigure the capturer. 209 batch_locker.unlock_and_reclaim_ap(ap.host_name) 210 logging.error('Packet capture is not healthy, ' 211 'rebooting.') 212 capturer.host.reboot() 213 capturer = site_linux_system.LinuxSystem( 214 capture_host, {},'packet_capturer') 215 continue 216 if networks == list(): 217 # Packet capturer did not find the SSID in scan or 218 # there was a security mismatch. 219 utils.release_ap(ap, batch_locker, self._broken_pdus) 220 continue 221 222 assoc_params = ap.get_association_parameters() 223 224 if not utils.is_conn_worker_healthy( 225 conn_worker, ap, assoc_params, job): 226 utils.release_ap( 227 ap, batch_locker, self._broken_pdus) 228 continue 229 230 name = ap.name 231 kernel_ver = self._host.get_kernel_ver() 232 firmware_ver = utils.get_firmware_ver(self._host) 233 if not firmware_ver: 234 firmware_ver = "Unknown" 235 236 debug_dict = {'+++PARSE DATA+++': '+++PARSE DATA+++', 237 'SSID': ap._ssid, 238 'DUT': client.wifi_mac, 239 'AP Info': ap.name, 240 'kernel_version': kernel_ver, 241 'wifi_firmware_version': firmware_ver} 242 debug_string = pprint.pformat(debug_dict) 243 244 logging.info('Waiting %d seconds for the AP dhcp ' 245 'server', ap.dhcp_delay) 246 time.sleep(ap.dhcp_delay) 247 248 result = job.run_test(self._test, 249 capturer=capturer, 250 capturer_frequency=networks[0].frequency, 251 capturer_ht_type=networks[0].ht, 252 host=self._host, 253 assoc_params=assoc_params, 254 client=client, 255 tries=tries, 256 debug_info=debug_string, 257 # Copy all logs from the system 258 disabled_sysinfo=disabled_sysinfo, 259 conn_worker=conn_worker, 260 tag=ap.ssid if conn_worker is None else 261 '%s.%s' % (conn_worker.name, ap.ssid)) 262 263 utils.release_ap(ap, batch_locker, self._broken_pdus) 264 265 if conn_worker is not None: 266 conn_worker.cleanup() 267 268 if not healthy_dut: 269 continue 270 271 batch_locker.unlock_aps() 272 273 if webdriver_tunnel: 274 webdriver_instance.disconnect_ssh_tunnel(webdriver_tunnel, 275 WEBDRIVER_PORT) 276 webdriver_instance.close() 277 capturer.close() 278 logging.info('Powering off VM %s', webdriver_instance) 279 utils.power_off_VM(webdriver_master, webdriver_instance) 280 lock_manager.unlock(webdriver_instance.hostname) 281 282 if self._broken_pdus: 283 logging.info('PDU is down!!!\nThe following PDUs are down:\n') 284 pprint.pprint(self._broken_pdus) 285 286 factory = ap_configurator_factory.APConfiguratorFactory( 287 ap_constants.AP_TEST_TYPE_CHAOS) 288 factory.turn_off_all_routers(self._broken_pdus) 289