1# Lint as: python2, python3 2"""This class defines the Remote host class.""" 3 4from __future__ import absolute_import 5from __future__ import division 6from __future__ import print_function 7import os, logging, time 8import six 9from six.moves import urllib 10import re 11from autotest_lib.client.common_lib import error 12from autotest_lib.server import utils 13from autotest_lib.server.hosts import base_classes 14 15 16class RemoteHost(base_classes.Host): 17 """ 18 This class represents a remote machine on which you can run 19 programs. 20 21 It may be accessed through a network, a serial line, ... 22 It is not the machine autoserv is running on. 23 24 Implementation details: 25 This is an abstract class, leaf subclasses must implement the methods 26 listed here and in parent classes which have no implementation. They 27 may reimplement methods which already have an implementation. You 28 must not instantiate this class but should instantiate one of those 29 leaf subclasses. 30 """ 31 32 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT 33 DEFAULT_HALT_TIMEOUT = 2 * 60 34 _LABEL_FUNCTIONS = [] 35 _DETECTABLE_LABELS = [] 36 37 VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start" 38 TMP_DIR_TEMPLATE = '/usr/local/tmp/autoserv-XXXXXX' 39 40 41 def _initialize(self, hostname, autodir=None, *args, **dargs): 42 super(RemoteHost, self)._initialize(*args, **dargs) 43 44 self.hostname = hostname 45 self.autodir = autodir 46 self.tmp_dirs = [] 47 48 49 def __repr__(self): 50 return "<remote host: %s>" % self.hostname 51 52 53 def close(self): 54 # pylint: disable=missing-docstring 55 super(RemoteHost, self).close() 56 self.stop_loggers() 57 58 if hasattr(self, 'tmp_dirs'): 59 for dir in self.tmp_dirs: 60 try: 61 self.run('rm -rf "%s"' % (utils.sh_escape(dir))) 62 except error.AutoservRunError: 63 pass 64 65 66 def job_start(self): 67 """ 68 Abstract method, called the first time a remote host object 69 is created for a specific host after a job starts. 70 71 This method depends on the create_host factory being used to 72 construct your host object. If you directly construct host objects 73 you will need to call this method yourself (and enforce the 74 single-call rule). 75 """ 76 try: 77 cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages ' 78 '%s') % self.VAR_LOG_MESSAGES_COPY_PATH 79 self.run(cmd) 80 except Exception as e: 81 # Non-fatal error 82 logging.info('Failed to copy /var/log/messages at startup: %s', e) 83 84 85 def get_autodir(self): 86 return self.autodir 87 88 89 def set_autodir(self, autodir): 90 """ 91 This method is called to make the host object aware of the 92 where autotest is installed. Called in server/autotest.py 93 after a successful install 94 """ 95 self.autodir = autodir 96 97 98 def sysrq_reboot(self): 99 # pylint: disable=missing-docstring 100 self.run_background('echo b > /proc/sysrq-trigger') 101 102 103 def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True): 104 """ 105 Shut down the remote host. 106 107 N.B. This method makes no provision to bring the target back 108 up. The target will be offline indefinitely if there's no 109 independent hardware (servo, RPM, etc.) to force the target to 110 power on. 111 112 @param timeout Maximum time to wait for host down, in seconds. 113 @param wait Whether to wait for the host to go offline. 114 """ 115 self.run_background('sleep 1 ; halt') 116 if wait: 117 self.wait_down(timeout=timeout) 118 119 120 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True, 121 fastsync=False, reboot_cmd=None, **dargs): 122 """ 123 Reboot the remote host. 124 125 Args: 126 timeout - How long to wait for the reboot. 127 wait - Should we wait to see if the machine comes back up. 128 If this is set to True, ignores reboot_cmd's error 129 even if occurs. 130 fastsync - Don't wait for the sync to complete, just start one 131 and move on. This is for cases where rebooting prompty 132 is more important than data integrity and/or the 133 machine may have disks that cause sync to never return. 134 reboot_cmd - Reboot command to execute. 135 """ 136 self.reboot_setup(**dargs) 137 if not reboot_cmd: 138 reboot_cmd = ('sync & sleep 5; ' 139 'reboot & sleep 60; ' 140 'reboot -f & sleep 10; ' 141 'reboot -nf & sleep 10; ' 142 'telinit 6') 143 144 def reboot(): 145 # pylint: disable=missing-docstring 146 self.record("GOOD", None, "reboot.start") 147 current_boot_id = None 148 try: 149 current_boot_id = self.get_boot_id() 150 151 # sync before starting the reboot, so that a long sync during 152 # shutdown isn't timed out by wait_down's short timeout 153 if not fastsync: 154 self.run('sync; sync', timeout=timeout, ignore_status=True) 155 156 self.run_background(reboot_cmd) 157 except error.AutoservRunError: 158 # If wait is set, ignore the error here, and rely on the 159 # wait_for_restart() for stability, instead. 160 # reboot_cmd sometimes causes an error even if reboot is 161 # successfully in progress. This is difficult to be avoided, 162 # because we have no much control on remote machine after 163 # "reboot" starts. 164 if not wait or current_boot_id is None: 165 # TODO(b/37652392): Revisit no-wait case, later. 166 self.record("ABORT", None, "reboot.start", 167 "reboot command failed") 168 raise 169 if wait: 170 self.wait_for_restart(timeout, old_boot_id=current_boot_id, 171 **dargs) 172 173 # if this is a full reboot-and-wait, run the reboot inside a group 174 if wait: 175 self.log_op(self.OP_REBOOT, reboot) 176 else: 177 reboot() 178 179 def suspend(self, timeout, suspend_cmd, 180 allow_early_resume=False): 181 """ 182 Suspend the remote host. 183 184 Args: 185 timeout - How long to wait for the suspend in integer seconds. 186 suspend_cmd - suspend command to execute. 187 allow_early_resume - Boolean that indicate whether resume 188 before |timeout| is ok. 189 Raises: 190 error.AutoservSuspendError - If |allow_early_resume| is False 191 and if device resumes before 192 |timeout|. 193 """ 194 # define a function for the supend and run it in a group 195 def suspend(): 196 # pylint: disable=missing-docstring 197 self.record("GOOD", None, "suspend.start for %d seconds" % (timeout)) 198 try: 199 self.run_background(suspend_cmd) 200 except error.AutoservRunError: 201 self.record("ABORT", None, "suspend.start", 202 "suspend command failed") 203 raise error.AutoservSuspendError("suspend command failed") 204 205 # Wait for some time, to ensure the machine is going to sleep. 206 # Not too long to check if the machine really suspended. 207 time_slice = min(timeout / 2, 300) 208 time.sleep(time_slice) 209 time_counter = time_slice 210 while time_counter < timeout + 60: 211 # Check if the machine is back. We check regularely to 212 # ensure the machine was suspended long enough. 213 if utils.ping(self.hostname, tries=1, deadline=1) == 0: 214 return 215 else: 216 if time_counter > timeout - 10: 217 time_slice = 5 218 time.sleep(time_slice) 219 time_counter += time_slice 220 221 if utils.ping(self.hostname, tries=1, deadline=1) != 0: 222 raise error.AutoservSuspendError( 223 "DUT is not responding after %d seconds" % (time_counter)) 224 225 start_time = time.time() 226 self.log_op(self.OP_SUSPEND, suspend) 227 lasted = time.time() - start_time 228 logging.info("Device resumed after %d secs", lasted) 229 if (lasted < timeout and not allow_early_resume): 230 raise error.AutoservSuspendError( 231 "Suspend did not last long enough: %d instead of %d" % ( 232 lasted, timeout)) 233 234 def reboot_followup(self, *args, **dargs): 235 # pylint: disable=missing-docstring 236 super(RemoteHost, self).reboot_followup(*args, **dargs) 237 if self.job: 238 self.job.profilers.handle_reboot(self) 239 240 241 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs): 242 """ 243 Wait for the host to come back from a reboot. This wraps the 244 generic wait_for_restart implementation in a reboot group. 245 """ 246 def op_func(): 247 # pylint: disable=missing-docstring 248 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs) 249 self.log_op(self.OP_REBOOT, op_func) 250 251 252 def cleanup(self): 253 # pylint: disable=missing-docstring 254 super(RemoteHost, self).cleanup() 255 self.reboot() 256 257 258 def get_tmp_dir(self, parent='/tmp'): 259 """ 260 Return the pathname of a directory on the host suitable 261 for temporary file storage. 262 263 The directory and its content will be deleted automatically 264 on the destruction of the Host object that was used to obtain 265 it. 266 """ 267 template = os.path.join(parent, self.TMP_DIR_TEMPLATE) 268 dir_name = self.run('mkdir -p %s && mktemp -d %s' % (parent, template)).stdout.rstrip() 269 self.tmp_dirs.append(dir_name) 270 return dir_name 271 272 273 def get_platform_label(self): 274 """ 275 Return the platform label, or None if platform label is not set. 276 """ 277 278 if self.job: 279 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals', 280 self.hostname) 281 keyvals = utils.read_keyval(keyval_path) 282 return keyvals.get('platform', None) 283 else: 284 return None 285 286 287 def get_all_labels(self): 288 """ 289 Return all labels, or empty list if label is not set. 290 """ 291 if self.job: 292 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals', 293 self.hostname) 294 keyvals = utils.read_keyval(keyval_path) 295 all_labels = keyvals.get('labels', '') 296 if all_labels: 297 all_labels = all_labels.split(',') 298 return [urllib.parse.unquote(label) for label in all_labels] 299 return [] 300 301 302 def delete_tmp_dir(self, tmpdir): 303 """ 304 Delete the given temporary directory on the remote machine. 305 306 @param tmpdir The directory to delete. 307 """ 308 self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True) 309 self.tmp_dirs.remove(tmpdir) 310 311 312 def delete_all_tmp_dirs(self, parent='/tmp'): 313 """ 314 Delete all directories in parent that were created by get_tmp_dir 315 316 Note that this may involve deleting directories created by calls to 317 get_tmp_dir on a different RemoteHost instance than the one running this 318 method. Only perform this operation when certain that this will not 319 cause unexpected behavior. 320 """ 321 # follow mktemp's behavior of only expanding 3 or more consecutive Xs 322 if isinstance(parent, (list, tuple)): 323 parents = parent 324 else: 325 parents = [parent] 326 rm_paths = [] 327 for parent in parents: 328 base_template = re.sub('XXXX*', '*', self.TMP_DIR_TEMPLATE) 329 # distinguish between non-wildcard asterisks in parent directory name 330 # and wildcards inserted from the template 331 base = '*'.join( 332 ['"%s"' % utils.sh_escape(x) for x in base_template.split('*')]) 333 path = '"%s' % os.path.join(utils.sh_escape(parent), base[1:]) 334 rm_paths.append(path) 335 # remove deleted directories from tmp_dirs 336 regex = os.path.join(parent, re.sub('(XXXX*)', 337 lambda match: '[a-zA-Z0-9]{%d}' % len(match.group(1)), 338 self.TMP_DIR_TEMPLATE)) 339 regex += '(/|$)' # remove if matches, or is within a dir that matches 340 self.tmp_dirs = [x for x in self.tmp_dirs if not re.match(regex, x)] 341 342 self.run('rm -rf {}'.format(" ".join(rm_paths)), ignore_status=True) 343 344 def check_uptime(self): 345 """ 346 Check that uptime is available and monotonically increasing. 347 """ 348 if not self.is_up(): 349 raise error.AutoservHostError('Client does not appear to be up') 350 result = self.run("/bin/cat /proc/uptime", 30) 351 return result.stdout.strip().split()[0] 352 353 354 def check_for_lkdtm(self): 355 """ 356 Check for kernel dump test module. return True if exist. 357 """ 358 cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT' 359 return self.run(cmd, ignore_status=True).exit_status == 0 360 361 362 def are_wait_up_processes_up(self): 363 """ 364 Checks if any HOSTS waitup processes are running yet on the 365 remote host. 366 367 Returns True if any the waitup processes are running, False 368 otherwise. 369 """ 370 processes = self.get_wait_up_processes() 371 if len(processes) == 0: 372 return True # wait up processes aren't being used 373 for procname in processes: 374 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname, 375 ignore_status=True).exit_status 376 if exit_status == 0: 377 return True 378 return False 379 380 381 def get_labels(self): 382 """Return a list of labels for this given host. 383 384 This is the main way to retrieve all the automatic labels for a host 385 as it will run through all the currently implemented label functions. 386 """ 387 labels = [] 388 for label_function in self._LABEL_FUNCTIONS: 389 try: 390 label = label_function(self) 391 except Exception: 392 logging.exception('Label function %s failed; ignoring it.', 393 label_function.__name__) 394 label = None 395 if label: 396 if type(label) is str: 397 labels.append(label) 398 elif type(label) is list: 399 labels.extend(label) 400 return labels 401 402 def get_result_dir(self): 403 """Return the result directory path if passed or None if not. 404 405 @return string 406 """ 407 if self.job and hasattr(self.job, 'resultdir'): 408 return self.job.resultdir 409 return None 410