1"""This class defines the Remote host class.""" 2 3import os, logging, urllib, time 4import re 5from autotest_lib.client.common_lib import error 6from autotest_lib.server import utils 7from autotest_lib.server.hosts import base_classes 8 9 10class RemoteHost(base_classes.Host): 11 """ 12 This class represents a remote machine on which you can run 13 programs. 14 15 It may be accessed through a network, a serial line, ... 16 It is not the machine autoserv is running on. 17 18 Implementation details: 19 This is an abstract class, leaf subclasses must implement the methods 20 listed here and in parent classes which have no implementation. They 21 may reimplement methods which already have an implementation. You 22 must not instantiate this class but should instantiate one of those 23 leaf subclasses. 24 """ 25 26 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT 27 DEFAULT_HALT_TIMEOUT = 2 * 60 28 _LABEL_FUNCTIONS = [] 29 _DETECTABLE_LABELS = [] 30 31 VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start" 32 TMP_DIR_TEMPLATE = 'autoserv-XXXXXX' 33 34 35 def _initialize(self, hostname, autodir=None, *args, **dargs): 36 super(RemoteHost, self)._initialize(*args, **dargs) 37 38 self.hostname = hostname 39 self.autodir = autodir 40 self.tmp_dirs = [] 41 42 43 def __repr__(self): 44 return "<remote host: %s>" % self.hostname 45 46 47 def close(self): 48 # pylint: disable=missing-docstring 49 super(RemoteHost, self).close() 50 self.stop_loggers() 51 52 if hasattr(self, 'tmp_dirs'): 53 for dir in self.tmp_dirs: 54 try: 55 self.run('rm -rf "%s"' % (utils.sh_escape(dir))) 56 except error.AutoservRunError: 57 pass 58 59 60 def job_start(self): 61 """ 62 Abstract method, called the first time a remote host object 63 is created for a specific host after a job starts. 64 65 This method depends on the create_host factory being used to 66 construct your host object. If you directly construct host objects 67 you will need to call this method yourself (and enforce the 68 single-call rule). 69 """ 70 try: 71 cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages ' 72 '%s') % self.VAR_LOG_MESSAGES_COPY_PATH 73 self.run(cmd) 74 except Exception, e: 75 # Non-fatal error 76 logging.info('Failed to copy /var/log/messages at startup: %s', e) 77 78 79 def get_autodir(self): 80 return self.autodir 81 82 83 def set_autodir(self, autodir): 84 """ 85 This method is called to make the host object aware of the 86 where autotest is installed. Called in server/autotest.py 87 after a successful install 88 """ 89 self.autodir = autodir 90 91 92 def sysrq_reboot(self): 93 # pylint: disable=missing-docstring 94 self.run_background('echo b > /proc/sysrq-trigger') 95 96 97 def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True): 98 """ 99 Shut down the remote host. 100 101 N.B. This method makes no provision to bring the target back 102 up. The target will be offline indefinitely if there's no 103 independent hardware (servo, RPM, etc.) to force the target to 104 power on. 105 106 @param timeout Maximum time to wait for host down, in seconds. 107 @param wait Whether to wait for the host to go offline. 108 """ 109 self.run_background('sleep 1 ; halt') 110 if wait: 111 self.wait_down(timeout=timeout) 112 113 114 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True, 115 fastsync=False, reboot_cmd=None, **dargs): 116 """ 117 Reboot the remote host. 118 119 Args: 120 timeout - How long to wait for the reboot. 121 wait - Should we wait to see if the machine comes back up. 122 If this is set to True, ignores reboot_cmd's error 123 even if occurs. 124 fastsync - Don't wait for the sync to complete, just start one 125 and move on. This is for cases where rebooting prompty 126 is more important than data integrity and/or the 127 machine may have disks that cause sync to never return. 128 reboot_cmd - Reboot command to execute. 129 """ 130 self.reboot_setup(**dargs) 131 if not reboot_cmd: 132 reboot_cmd = ('sync & sleep 5; ' 133 'reboot & sleep 60; ' 134 'reboot -f & sleep 10; ' 135 'reboot -nf & sleep 10; ' 136 'telinit 6') 137 138 def reboot(): 139 # pylint: disable=missing-docstring 140 self.record("GOOD", None, "reboot.start") 141 current_boot_id = None 142 try: 143 current_boot_id = self.get_boot_id() 144 145 # sync before starting the reboot, so that a long sync during 146 # shutdown isn't timed out by wait_down's short timeout 147 if not fastsync: 148 self.run('sync; sync', timeout=timeout, ignore_status=True) 149 150 self.run_background(reboot_cmd) 151 except error.AutoservRunError: 152 # If wait is set, ignore the error here, and rely on the 153 # wait_for_restart() for stability, instead. 154 # reboot_cmd sometimes causes an error even if reboot is 155 # successfully in progress. This is difficult to be avoided, 156 # because we have no much control on remote machine after 157 # "reboot" starts. 158 if not wait or current_boot_id is None: 159 # TODO(b/37652392): Revisit no-wait case, later. 160 self.record("ABORT", None, "reboot.start", 161 "reboot command failed") 162 raise 163 if wait: 164 self.wait_for_restart(timeout, old_boot_id=current_boot_id, 165 **dargs) 166 167 # if this is a full reboot-and-wait, run the reboot inside a group 168 if wait: 169 self.log_op(self.OP_REBOOT, reboot) 170 else: 171 reboot() 172 173 def suspend(self, timeout, suspend_cmd, 174 allow_early_resume=False): 175 """ 176 Suspend the remote host. 177 178 Args: 179 timeout - How long to wait for the suspend in integer seconds. 180 suspend_cmd - suspend command to execute. 181 allow_early_resume - Boolean that indicate whether resume 182 before |timeout| is ok. 183 Raises: 184 error.AutoservSuspendError - If |allow_early_resume| is False 185 and if device resumes before 186 |timeout|. 187 """ 188 # define a function for the supend and run it in a group 189 def suspend(): 190 # pylint: disable=missing-docstring 191 self.record("GOOD", None, "suspend.start for %d seconds" % (timeout)) 192 try: 193 self.run_background(suspend_cmd) 194 except error.AutoservRunError: 195 self.record("ABORT", None, "suspend.start", 196 "suspend command failed") 197 raise error.AutoservSuspendError("suspend command failed") 198 199 # Wait for some time, to ensure the machine is going to sleep. 200 # Not too long to check if the machine really suspended. 201 time_slice = min(timeout / 2, 300) 202 time.sleep(time_slice) 203 time_counter = time_slice 204 while time_counter < timeout + 60: 205 # Check if the machine is back. We check regularely to 206 # ensure the machine was suspended long enough. 207 if utils.ping(self.hostname, tries=1, deadline=1) == 0: 208 return 209 else: 210 if time_counter > timeout - 10: 211 time_slice = 5 212 time.sleep(time_slice) 213 time_counter += time_slice 214 215 if utils.ping(self.hostname, tries=1, deadline=1) != 0: 216 raise error.AutoservSuspendError( 217 "DUT is not responding after %d seconds" % (time_counter)) 218 219 start_time = time.time() 220 self.log_op(self.OP_SUSPEND, suspend) 221 lasted = time.time() - start_time 222 logging.info("Device resumed after %d secs", lasted) 223 if (lasted < timeout and not allow_early_resume): 224 raise error.AutoservSuspendError( 225 "Suspend did not last long enough: %d instead of %d" % ( 226 lasted, timeout)) 227 228 def reboot_followup(self, *args, **dargs): 229 # pylint: disable=missing-docstring 230 super(RemoteHost, self).reboot_followup(*args, **dargs) 231 if self.job: 232 self.job.profilers.handle_reboot(self) 233 234 235 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs): 236 """ 237 Wait for the host to come back from a reboot. This wraps the 238 generic wait_for_restart implementation in a reboot group. 239 """ 240 def op_func(): 241 # pylint: disable=missing-docstring 242 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs) 243 self.log_op(self.OP_REBOOT, op_func) 244 245 246 def cleanup(self): 247 # pylint: disable=missing-docstring 248 super(RemoteHost, self).cleanup() 249 self.reboot() 250 251 252 def get_tmp_dir(self, parent='/tmp'): 253 """ 254 Return the pathname of a directory on the host suitable 255 for temporary file storage. 256 257 The directory and its content will be deleted automatically 258 on the destruction of the Host object that was used to obtain 259 it. 260 """ 261 self.run("mkdir -p %s" % parent) 262 template = os.path.join(parent, self.TMP_DIR_TEMPLATE) 263 dir_name = self.run("mktemp -d %s" % template).stdout.rstrip() 264 self.tmp_dirs.append(dir_name) 265 return dir_name 266 267 268 def get_platform_label(self): 269 """ 270 Return the platform label, or None if platform label is not set. 271 """ 272 273 if self.job: 274 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals', 275 self.hostname) 276 keyvals = utils.read_keyval(keyval_path) 277 return keyvals.get('platform', None) 278 else: 279 return None 280 281 282 def get_all_labels(self): 283 """ 284 Return all labels, or empty list if label is not set. 285 """ 286 if self.job: 287 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals', 288 self.hostname) 289 keyvals = utils.read_keyval(keyval_path) 290 all_labels = keyvals.get('labels', '') 291 if all_labels: 292 all_labels = all_labels.split(',') 293 return [urllib.unquote(label) for label in all_labels] 294 return [] 295 296 297 def delete_tmp_dir(self, tmpdir): 298 """ 299 Delete the given temporary directory on the remote machine. 300 301 @param tmpdir The directory to delete. 302 """ 303 self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True) 304 self.tmp_dirs.remove(tmpdir) 305 306 307 def delete_all_tmp_dirs(self, parent='/tmp'): 308 """ 309 Delete all directories in parent that were created by get_tmp_dir 310 311 Note that this may involve deleting directories created by calls to 312 get_tmp_dir on a different RemoteHost instance than the one running this 313 method. Only perform this operation when certain that this will not 314 cause unexpected behavior. 315 """ 316 # follow mktemp's behavior of only expanding 3 or more consecutive Xs 317 base_template = re.sub('XXXX*', '*', self.TMP_DIR_TEMPLATE) 318 # distinguish between non-wildcard asterisks in parent directory name 319 # and wildcards inserted from the template 320 base = '*'.join(map(lambda x: '"%s"' % utils.sh_escape(x), 321 base_template.split('*'))) 322 path = '"%s' % os.path.join(utils.sh_escape(parent), base[1:]) 323 self.run('rm -rf %s' % path, ignore_status=True) 324 # remove deleted directories from tmp_dirs 325 regex = os.path.join(parent, re.sub('(XXXX*)', 326 lambda match: '[a-zA-Z0-9]{%d}' % len(match.group(1)), 327 self.TMP_DIR_TEMPLATE)) 328 regex += '(/|$)' # remove if matches, or is within a dir that matches 329 self.tmp_dirs = filter(lambda x: not re.match(regex, x), self.tmp_dirs) 330 331 332 def check_uptime(self): 333 """ 334 Check that uptime is available and monotonically increasing. 335 """ 336 if not self.is_up(): 337 raise error.AutoservHostError('Client does not appear to be up') 338 result = self.run("/bin/cat /proc/uptime", 30) 339 return result.stdout.strip().split()[0] 340 341 342 def check_for_lkdtm(self): 343 """ 344 Check for kernel dump test module. return True if exist. 345 """ 346 cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT' 347 return self.run(cmd, ignore_status=True).exit_status == 0 348 349 350 def are_wait_up_processes_up(self): 351 """ 352 Checks if any HOSTS waitup processes are running yet on the 353 remote host. 354 355 Returns True if any the waitup processes are running, False 356 otherwise. 357 """ 358 processes = self.get_wait_up_processes() 359 if len(processes) == 0: 360 return True # wait up processes aren't being used 361 for procname in processes: 362 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname, 363 ignore_status=True).exit_status 364 if exit_status == 0: 365 return True 366 return False 367 368 369 def get_labels(self): 370 """Return a list of labels for this given host. 371 372 This is the main way to retrieve all the automatic labels for a host 373 as it will run through all the currently implemented label functions. 374 """ 375 labels = [] 376 for label_function in self._LABEL_FUNCTIONS: 377 try: 378 label = label_function(self) 379 except Exception: 380 logging.exception('Label function %s failed; ignoring it.', 381 label_function.__name__) 382 label = None 383 if label: 384 if type(label) is str: 385 labels.append(label) 386 elif type(label) is list: 387 labels.extend(label) 388 return labels 389