1# Copyright (c) 2014 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import logging 6import os 7import re 8import time 9 10import common 11from autotest_lib.client.common_lib import error, global_config 12from autotest_lib.client.common_lib.cros import retry 13from autotest_lib.server.cros.dynamic_suite import frontend_wrappers 14from autotest_lib.server.hosts import cros_host 15from autotest_lib.server.hosts import cros_repair 16 17from chromite.lib import timeout_util 18 19AUTOTEST_INSTALL_DIR = global_config.global_config.get_config_value( 20 'SCHEDULER', 'drone_installation_directory') 21 22#'/usr/local/autotest' 23SHADOW_CONFIG_PATH = '%s/shadow_config.ini' % AUTOTEST_INSTALL_DIR 24ATEST_PATH = '%s/cli/atest' % AUTOTEST_INSTALL_DIR 25 26# Sample output of fping that we are matching against, the fping command 27# will return 10 lines but they will be one of these two formats. 28# We want to get the IP address for the first line and not match the 29# second line that has a non 0 %loss. 30#192.168.231.100 : xmt/rcv/%loss = 10/10/0%, min/avg/max = 0.68/0.88/1.13 31#192.168.231.102 : xmt/rcv/%loss = 10/0/100% 32SUBNET_DUT_SEARCH_RE = (r'(?P<ip>192.168.231.1[0-1][0-9]) : ' 33 'xmt\/rcv\/%loss = [0-9]+\/[0-9]+\/0%') 34 35MOBLAB_HOME = '/home/moblab' 36MOBLAB_BOTO_LOCATION = '%s/.boto' % MOBLAB_HOME 37MOBLAB_LAUNCH_CONTROL_KEY_LOCATION = '%s/.launch_control_key' % MOBLAB_HOME 38MOBLAB_SERVICE_ACCOUNT_LOCATION = '%s/.service_account.json' % MOBLAB_HOME 39MOBLAB_AUTODIR = '/usr/local/autodir' 40DHCPD_LEASE_FILE = '/var/lib/dhcp/dhcpd.leases' 41MOBLAB_SERVICES = ['moblab-scheduler-init', 42 'moblab-database-init', 43 'moblab-devserver-init', 44 'moblab-gsoffloader-init', 45 'moblab-gsoffloader_s-init'] 46MOBLAB_PROCESSES = ['apache2', 'dhcpd'] 47DUT_VERIFY_SLEEP_SECS = 5 48DUT_VERIFY_TIMEOUT = 15 * 60 49MOBLAB_TMP_DIR = '/mnt/moblab/tmp' 50MOBLAB_PORT = 80 51 52 53class UpstartServiceNotRunning(error.AutoservError): 54 """An expected upstart service was not in the expected state.""" 55 56 def __init__(self, service_name): 57 """Create us. 58 @param service_name: Name of the service_name that was in the worng 59 state. 60 """ 61 super(UpstartServiceNotRunning, self).__init__( 62 'Upstart service %s not in running state. Most likely this ' 63 'means moblab did not boot correctly, check the boot logs ' 64 'for detailed error messages as to see why this service was ' 65 'not started.' % 66 service_name) 67 68 69class MoblabHost(cros_host.CrosHost): 70 """Moblab specific host class.""" 71 72 73 def _initialize_frontend_rpcs(self, timeout_min): 74 """Initialize frontends for AFE and TKO for a moblab host. 75 76 We tunnel all communication to the frontends through an SSH tunnel as 77 many testing environments block everything except SSH access to the 78 moblab DUT. 79 80 @param timeout_min: The timeout minuties for AFE services. 81 """ 82 web_address = self.rpc_server_tracker.tunnel_connect(MOBLAB_PORT) 83 # Pass timeout_min to self.afe 84 self.afe = frontend_wrappers.RetryingAFE(timeout_min=timeout_min, 85 user='moblab', 86 server=web_address) 87 # Use default timeout_min of MoblabHost for self.tko 88 self.tko = frontend_wrappers.RetryingTKO(timeout_min=self.timeout_min, 89 user='moblab', 90 server=web_address) 91 92 93 def _initialize(self, *args, **dargs): 94 super(MoblabHost, self)._initialize(*args, **dargs) 95 # TODO(jrbarnette): Our superclass already initialized 96 # _repair_strategy, and now we're re-initializing it here. 97 # That's awkward, if not actually wrong. 98 self._repair_strategy = cros_repair.create_moblab_repair_strategy() 99 self.timeout_min = dargs.get('rpc_timeout_min', 1) 100 self._initialize_frontend_rpcs(self.timeout_min) 101 102 103 @staticmethod 104 def check_host(host, timeout=10): 105 """ 106 Check if the given host is an moblab host. 107 108 @param host: An ssh host representing a device. 109 @param timeout: The timeout for the run command. 110 111 112 @return: True if the host device has adb. 113 114 @raises AutoservRunError: If the command failed. 115 @raises AutoservSSHTimeout: Ssh connection has timed out. 116 """ 117 try: 118 result = host.run( 119 'grep -q moblab /etc/lsb-release', 120 ignore_status=True, timeout=timeout) 121 except (error.AutoservRunError, error.AutoservSSHTimeout): 122 return False 123 return result.exit_status == 0 124 125 126 def install_boto_file(self, boto_path=''): 127 """Install a boto file on the Moblab device. 128 129 @param boto_path: Path to the boto file to install. If None, sends the 130 boto file in the current HOME directory. 131 132 @raises error.TestError if the boto file does not exist. 133 """ 134 if not boto_path: 135 boto_path = os.path.join(os.getenv('HOME'), '.boto') 136 if not os.path.exists(boto_path): 137 raise error.TestError('Boto File:%s does not exist.' % boto_path) 138 self.send_file(boto_path, MOBLAB_BOTO_LOCATION) 139 self.run('chown moblab:moblab %s' % MOBLAB_BOTO_LOCATION) 140 141 142 def get_autodir(self): 143 """Return the directory to install autotest for client side tests.""" 144 return self.autodir or MOBLAB_AUTODIR 145 146 147 def run_as_moblab(self, command, **kwargs): 148 """Moblab commands should be ran as the moblab user not root. 149 150 @param command: Command to run as user moblab. 151 """ 152 command = "su - moblab -c '%s'" % command 153 return self.run(command, **kwargs) 154 155 156 def wait_afe_up(self, timeout_min=5): 157 """Wait till the AFE is up and loaded. 158 159 Attempt to reach the Moblab's AFE and database through its RPC 160 interface. 161 162 @param timeout_min: Minutes to wait for the AFE to respond. Default is 163 5 minutes. 164 165 @raises urllib2.HTTPError if AFE does not respond within the timeout. 166 """ 167 # Use moblabhost's own AFE object with a longer timeout to wait for the 168 # AFE to load. Also re-create the ssh tunnel for connections to moblab. 169 # Set the timeout_min to be longer than self.timeout_min for rebooting. 170 self._initialize_frontend_rpcs(timeout_min) 171 # Verify the AFE can handle a simple request. 172 self._check_afe() 173 # Reset the timeout_min after rebooting checks for afe services. 174 self.afe.set_timeout(self.timeout_min) 175 176 177 def add_dut(self, hostname): 178 """Add a DUT hostname to the AFE. 179 180 @param hostname: DUT hostname to add. 181 """ 182 result = self.run_as_moblab('%s host create %s' % (ATEST_PATH, 183 hostname)) 184 logging.debug('atest host create output for host %s:\n%s', 185 hostname, result.stdout) 186 187 188 def find_and_add_duts(self): 189 """Discover DUTs on the testing subnet and add them to the AFE. 190 191 Pings the range of IP's a DUT might be assigned by moblab, then 192 parses the output to discover connected DUTs, connected means 193 they have 0% dropped pings. 194 If they are not already in the AFE, adds them to AFE. 195 """ 196 existing_hosts = [host.hostname for host in self.afe.get_hosts()] 197 fping_result = self.run('fping -g 192.168.231.100 192.168.231.110 ' 198 '-a -c 10 -p 30 -q', ignore_status=True) 199 for line in fping_result.stderr.splitlines(): 200 match = re.match(SUBNET_DUT_SEARCH_RE, line) 201 if match: 202 dut_ip = match.group('ip') 203 if dut_ip in existing_hosts: 204 break 205 if self._check_dut_ssh(dut_ip): 206 self.add_dut(dut_ip) 207 existing_hosts.append(dut_ip) 208 209 def _check_dut_ssh(self, dut_ip): 210 is_sshable = False 211 count = 0 212 while not is_sshable and count < 10: 213 cmd = ('ssh -o ConnectTimeout=30 -o ConnectionAttempts=30' 214 ' root@%s echo Testing' % dut_ip) 215 result = self.run(cmd) 216 is_sshable = 'Testing' in result.stdout 217 logging.info(is_sshable) 218 count += 1 219 return is_sshable 220 221 def verify_software(self): 222 """Create the autodir then do standard verify.""" 223 # In case cleanup or powerwash wiped the autodir, create an empty 224 # directory. 225 # Removing this mkdir command will result in the disk size check 226 # not being performed. 227 self.run('mkdir -p %s' % MOBLAB_AUTODIR) 228 super(MoblabHost, self).verify_software() 229 230 231 def _verify_upstart_service(self, service, timeout_m): 232 """Verify that the given moblab service is running. 233 234 @param service: The upstart service to check for. 235 @timeout_m: Timeout (in minuts) before giving up. 236 @raises TimeoutException or UpstartServiceNotRunning if service isn't 237 running. 238 """ 239 @retry.retry(error.AutoservError, timeout_min=timeout_m, delay_sec=10) 240 def _verify(): 241 if not self.upstart_status(service): 242 raise UpstartServiceNotRunning(service) 243 _verify() 244 245 def verify_moblab_services(self, timeout_m): 246 """Verify the required Moblab services are up and running. 247 248 @param timeout_m: Timeout (in minutes) for how long to wait for services 249 to start. Actual time taken may be slightly more than this. 250 @raises AutoservError if any moblab service is not running. 251 """ 252 if not MOBLAB_SERVICES: 253 return 254 255 service = MOBLAB_SERVICES[0] 256 try: 257 # First service can take a long time to start, especially on first 258 # boot where container setup can take 5-10 minutes, depending on the 259 # device. 260 self._verify_upstart_service(service, timeout_m) 261 except error.TimeoutException: 262 raise UpstartServiceNotRunning(service) 263 264 for service in MOBLAB_SERVICES[1:]: 265 try: 266 # Follow up services should come up quickly. 267 self._verify_upstart_service(service, 0.5) 268 except error.TimeoutException: 269 raise UpstartServiceNotRunning(service) 270 271 for process in MOBLAB_PROCESSES: 272 try: 273 self.run('pgrep %s' % process) 274 except error.AutoservRunError: 275 raise error.AutoservError('Moblab process: %s is not running.' 276 % process) 277 278 279 def _check_afe(self): 280 """Verify whether afe of moblab works before verifying its DUTs. 281 282 Verifying moblab sometimes happens after a successful provision, in 283 which case moblab is restarted but tunnel of afe is not re-connected. 284 This func is used to check whether afe is working now. 285 286 @return True if afe works. 287 @raises error.AutoservError if AFE is down; other exceptions are passed 288 through. 289 """ 290 try: 291 self.afe.get_hosts() 292 except (error.TimeoutException, timeout_util.TimeoutError) as e: 293 raise error.AutoservError('Moblab AFE is not responding: %s' % 294 str(e)) 295 except Exception as e: 296 logging.error('Unknown exception when checking moblab AFE: %s', e) 297 raise 298 299 return True 300 301 302 def verify_duts(self): 303 """Verify the Moblab DUTs are up and running. 304 305 @raises AutoservError if no DUTs are in the Ready State. 306 """ 307 hosts = self.afe.reverify_hosts() 308 logging.debug('DUTs scheduled for reverification: %s', hosts) 309 310 311 def verify_special_tasks_complete(self): 312 """Wait till the special tasks on the moblab host are complete.""" 313 total_time = 0 314 while (self.afe.get_special_tasks(is_complete=False) and 315 total_time < DUT_VERIFY_TIMEOUT): 316 total_time = total_time + DUT_VERIFY_SLEEP_SECS 317 time.sleep(DUT_VERIFY_SLEEP_SECS) 318 if not self.afe.get_hosts(status='Ready'): 319 for host in self.afe.get_hosts(): 320 logging.error('DUT: %s Status: %s', host, host.status) 321 raise error.AutoservError('Moblab has 0 Ready DUTs') 322 323 324 def get_platform(self): 325 """Determine the correct platform label for this host. 326 327 For Moblab devices '_moblab' is appended. 328 329 @returns a string representing this host's platform. 330 """ 331 return super(MoblabHost, self).get_platform() + '_moblab' 332 333 334 def make_tmp_dir(self, base=MOBLAB_TMP_DIR): 335 """Creates a temporary directory. 336 337 @param base: The directory where it should be created. 338 339 @return Path to a newly created temporary directory. 340 """ 341 self.run('mkdir -p %s' % base) 342 return self.run('mktemp -d -p %s' % base).stdout.strip() 343 344 345 def get_os_type(self): 346 return 'moblab' 347