• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import contextlib
6import datetime
7import logging
8import pprint
9import time
10
11import common
12from autotest_lib.client.common_lib import error
13from autotest_lib.client.common_lib import utils as client_utils
14from autotest_lib.client.common_lib.cros.network import ap_constants
15from autotest_lib.client.common_lib.cros.network import iw_runner
16from autotest_lib.server import hosts
17from autotest_lib.server import site_linux_system
18from autotest_lib.server.cros import host_lock_manager
19from autotest_lib.server.cros.ap_configurators import ap_batch_locker
20from autotest_lib.server.cros.ap_configurators \
21        import ap_configurator_factory
22from autotest_lib.server.cros.network import chaos_clique_utils as utils
23from autotest_lib.server.cros.network import wifi_client
24from autotest_lib.server.hosts import adb_host
25
26# Webdriver master hostname
27MASTERNAME = 'chromeos3-chaosvmmaster.cros.corp.google.com'
28WEBDRIVER_PORT = 9515
29
30
31class ChaosRunner(object):
32    """Object to run a network_WiFi_ChaosXXX test."""
33
34
35    def __init__(self, test, host, spec, broken_pdus=list()):
36        """Initializes and runs test.
37
38        @param test: a string, test name.
39        @param host: an Autotest host object, device under test.
40        @param spec: an APSpec object.
41        @param broken_pdus: list of offline PDUs.
42
43        """
44        self._test = test
45        self._host = host
46        self._ap_spec = spec
47        self._broken_pdus = broken_pdus
48        # Log server and DUT times
49        dt = datetime.datetime.now()
50        logging.info('Server time: %s', dt.strftime('%a %b %d %H:%M:%S %Y'))
51        logging.info('DUT time: %s', self._host.run('date').stdout.strip())
52
53
54    def run(self, job, batch_size=10, tries=10, capturer_hostname=None,
55            conn_worker=None, work_client_hostname=None,
56            disabled_sysinfo=False):
57        """Executes Chaos test.
58
59        @param job: an Autotest job object.
60        @param batch_size: an integer, max number of APs to lock in one batch.
61        @param tries: an integer, number of iterations to run per AP.
62        @param capturer_hostname: a string or None, hostname or IP of capturer.
63        @param conn_worker: ConnectionWorkerAbstract or None, to run extra
64                            work after successful connection.
65        @param work_client_hostname: a string or None, hostname of work client
66        @param disabled_sysinfo: a bool, disable collection of logs from DUT.
67
68
69        @raises TestError: Issues locking VM webdriver instance
70        """
71
72        lock_manager = host_lock_manager.HostLockManager()
73        webdriver_master = hosts.SSHHost(MASTERNAME, user='chaosvmmaster')
74        host_prefix = self._host.hostname.split('-')[0]
75        with host_lock_manager.HostsLockedBy(lock_manager):
76            capture_host = utils.allocate_packet_capturer(
77                    lock_manager, hostname=capturer_hostname,
78                    prefix=host_prefix)
79            # Cleanup and reboot packet capturer before the test.
80            utils.sanitize_client(capture_host)
81            capturer = site_linux_system.LinuxSystem(capture_host, {},
82                                                     'packet_capturer')
83
84            # Run iw scan and abort if more than allowed number of APs are up.
85            iw_command = iw_runner.IwRunner(capture_host)
86            start_time = time.time()
87            logging.info('Performing a scan with a max timeout of 30 seconds.')
88            capture_interface = 'wlan0'
89            capturer_info = capture_host.run('cat /etc/lsb-release',
90                                             ignore_status=True, timeout=5).stdout
91            if 'whirlwind' in capturer_info:
92                # Use the dual band aux radio for scanning networks.
93                capture_interface = 'wlan2'
94            while time.time() - start_time <= ap_constants.MAX_SCAN_TIMEOUT:
95                networks = iw_command.scan(capture_interface)
96                if networks is None:
97                    if (time.time() - start_time ==
98                            ap_constants.MAX_SCAN_TIMEOUT):
99                        raise error.TestError(
100                            'Packet capturer is not responding to scans. Check'
101                            'device and re-run test')
102                    continue
103                elif len(networks) < ap_constants.MAX_SSID_COUNT:
104                    break
105                elif len(networks) >= ap_constants.MAX_SSID_COUNT:
106                    raise error.TestError(
107                        'Probably someone is already running a '
108                        'chaos test?!')
109
110            if conn_worker is not None:
111                work_client_machine = utils.allocate_packet_capturer(
112                        lock_manager, hostname=work_client_hostname)
113                conn_worker.prepare_work_client(work_client_machine)
114
115            # Lock VM. If on, power off; always power on. Then create a tunnel.
116            webdriver_instance = utils.allocate_webdriver_instance(lock_manager)
117
118            if utils.is_VM_running(webdriver_master, webdriver_instance):
119                logging.info('VM %s was on; powering off for a clean instance',
120                             webdriver_instance)
121                utils.power_off_VM(webdriver_master, webdriver_instance)
122                logging.info('Allow VM time to gracefully shut down')
123                time.sleep(5)
124
125            logging.info('Starting up VM %s', webdriver_instance)
126            utils.power_on_VM(webdriver_master, webdriver_instance)
127            logging.info('Allow VM time to power on before creating a tunnel.')
128            time.sleep(30)
129
130            if not client_utils.host_is_in_lab_zone(webdriver_instance.hostname):
131                self._ap_spec._webdriver_hostname = webdriver_instance.hostname
132            else:
133                # If in the lab then port forwarding must be done so webdriver
134                # connection will be over localhost.
135                self._ap_spec._webdriver_hostname = 'localhost'
136                webdriver_tunnel = webdriver_instance.create_ssh_tunnel(
137                                                WEBDRIVER_PORT, WEBDRIVER_PORT)
138                logging.info('Wait for tunnel to be created.')
139                for i in range(3):
140                    time.sleep(10)
141                    results = client_utils.run('lsof -i:%s' % WEBDRIVER_PORT,
142                                             ignore_status=True)
143                    if results:
144                        break
145                if not results:
146                    raise error.TestError(
147                            'Unable to listen to WEBDRIVER_PORT: %s', results)
148
149            batch_locker = ap_batch_locker.ApBatchLocker(
150                    lock_manager, self._ap_spec,
151                    ap_test_type=ap_constants.AP_TEST_TYPE_CHAOS)
152
153            while batch_locker.has_more_aps():
154                # Work around for CrOS devices only:crbug.com/358716
155                # Do not reboot Android devices:b/27977927
156                if self._host.get_os_type() != adb_host.OS_TYPE_ANDROID:
157                    utils.sanitize_client(self._host)
158                healthy_dut = True
159
160                with contextlib.closing(wifi_client.WiFiClient(
161                    hosts.create_host(
162                            {
163                                    'hostname' : self._host.hostname,
164                                    'afe_host' : self._host._afe_host,
165                                    'host_info_store':
166                                            self._host.host_info_store,
167                            },
168                            host_class=self._host.__class__,
169                    ),
170                    './debug',
171                    False,
172                )) as client:
173
174                    aps = batch_locker.get_ap_batch(batch_size=batch_size)
175                    if not aps:
176                        logging.info('No more APs to test.')
177                        break
178
179                    # Power down all of the APs because some can get grumpy
180                    # if they are configured several times and remain on.
181                    # User the cartridge to down group power downs and
182                    # configurations.
183                    utils.power_down_aps(aps, self._broken_pdus)
184                    utils.configure_aps(aps, self._ap_spec, self._broken_pdus)
185
186                    aps = utils.filter_quarantined_and_config_failed_aps(aps,
187                            batch_locker, job, self._broken_pdus)
188
189                    for ap in aps:
190                        # http://crbug.com/306687
191                        if ap.ssid == None:
192                            logging.error('The SSID was not set for the AP:%s',
193                                          ap)
194
195                        healthy_dut = utils.is_dut_healthy(client, ap)
196
197                        if not healthy_dut:
198                            logging.error('DUT is not healthy, rebooting.')
199                            batch_locker.unlock_and_reclaim_aps()
200                            break
201
202                        networks = utils.return_available_networks(
203                                ap, capturer, job, self._ap_spec)
204
205                        if networks is None:
206                            # If scan returned no networks, iw scan failed.
207                            # Reboot the packet capturer device and
208                            # reconfigure the capturer.
209                            batch_locker.unlock_and_reclaim_ap(ap.host_name)
210                            logging.error('Packet capture is not healthy, '
211                                          'rebooting.')
212                            capturer.host.reboot()
213                            capturer = site_linux_system.LinuxSystem(
214                                           capture_host, {},'packet_capturer')
215                            continue
216                        if networks == list():
217                           # Packet capturer did not find the SSID in scan or
218                           # there was a security mismatch.
219                           utils.release_ap(ap, batch_locker, self._broken_pdus)
220                           continue
221
222                        assoc_params = ap.get_association_parameters()
223
224                        if not utils.is_conn_worker_healthy(
225                                conn_worker, ap, assoc_params, job):
226                            utils.release_ap(
227                                    ap, batch_locker, self._broken_pdus)
228                            continue
229
230                        name = ap.name
231                        kernel_ver = self._host.get_kernel_ver()
232                        firmware_ver = utils.get_firmware_ver(self._host)
233                        if not firmware_ver:
234                            firmware_ver = "Unknown"
235
236                        debug_dict = {'+++PARSE DATA+++': '+++PARSE DATA+++',
237                                      'SSID': ap._ssid,
238                                      'DUT': client.wifi_mac,
239                                      'AP Info': ap.name,
240                                      'kernel_version': kernel_ver,
241                                      'wifi_firmware_version': firmware_ver}
242                        debug_string = pprint.pformat(debug_dict)
243
244                        logging.info('Waiting %d seconds for the AP dhcp '
245                                     'server', ap.dhcp_delay)
246                        time.sleep(ap.dhcp_delay)
247
248                        result = job.run_test(self._test,
249                                     capturer=capturer,
250                                     capturer_frequency=networks[0].frequency,
251                                     capturer_ht_type=networks[0].ht,
252                                     host=self._host,
253                                     assoc_params=assoc_params,
254                                     client=client,
255                                     tries=tries,
256                                     debug_info=debug_string,
257                                     # Copy all logs from the system
258                                     disabled_sysinfo=disabled_sysinfo,
259                                     conn_worker=conn_worker,
260                                     tag=ap.ssid if conn_worker is None else
261                                         '%s.%s' % (conn_worker.name, ap.ssid))
262
263                        utils.release_ap(ap, batch_locker, self._broken_pdus)
264
265                        if conn_worker is not None:
266                            conn_worker.cleanup()
267
268                    if not healthy_dut:
269                        continue
270
271                batch_locker.unlock_aps()
272
273            if webdriver_tunnel:
274                webdriver_instance.disconnect_ssh_tunnel(webdriver_tunnel,
275                                                         WEBDRIVER_PORT)
276                webdriver_instance.close()
277            capturer.close()
278            logging.info('Powering off VM %s', webdriver_instance)
279            utils.power_off_VM(webdriver_master, webdriver_instance)
280            lock_manager.unlock(webdriver_instance.hostname)
281
282            if self._broken_pdus:
283                logging.info('PDU is down!!!\nThe following PDUs are down:\n')
284                pprint.pprint(self._broken_pdus)
285
286            factory = ap_configurator_factory.APConfiguratorFactory(
287                    ap_constants.AP_TEST_TYPE_CHAOS)
288            factory.turn_off_all_routers(self._broken_pdus)
289