1"""This class defines the Remote host class.""" 2 3import os, logging, urllib, time 4from autotest_lib.client.common_lib import error 5from autotest_lib.server import utils 6from autotest_lib.server.hosts import base_classes 7 8 9class RemoteHost(base_classes.Host): 10 """ 11 This class represents a remote machine on which you can run 12 programs. 13 14 It may be accessed through a network, a serial line, ... 15 It is not the machine autoserv is running on. 16 17 Implementation details: 18 This is an abstract class, leaf subclasses must implement the methods 19 listed here and in parent classes which have no implementation. They 20 may reimplement methods which already have an implementation. You 21 must not instantiate this class but should instantiate one of those 22 leaf subclasses. 23 """ 24 25 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT 26 DEFAULT_HALT_TIMEOUT = 2 * 60 27 _LABEL_FUNCTIONS = [] 28 _DETECTABLE_LABELS = [] 29 30 VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start" 31 32 33 def _initialize(self, hostname, autodir=None, *args, **dargs): 34 super(RemoteHost, self)._initialize(*args, **dargs) 35 36 self.hostname = hostname 37 self.autodir = autodir 38 self.tmp_dirs = [] 39 40 41 def __repr__(self): 42 return "<remote host: %s>" % self.hostname 43 44 45 def close(self): 46 super(RemoteHost, self).close() 47 self.stop_loggers() 48 49 if hasattr(self, 'tmp_dirs'): 50 for dir in self.tmp_dirs: 51 try: 52 self.run('rm -rf "%s"' % (utils.sh_escape(dir))) 53 except error.AutoservRunError: 54 pass 55 56 57 def job_start(self): 58 """ 59 Abstract method, called the first time a remote host object 60 is created for a specific host after a job starts. 61 62 This method depends on the create_host factory being used to 63 construct your host object. If you directly construct host objects 64 you will need to call this method yourself (and enforce the 65 single-call rule). 66 """ 67 try: 68 cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages ' 69 '%s') % self.VAR_LOG_MESSAGES_COPY_PATH 70 self.run(cmd) 71 except Exception, e: 72 # Non-fatal error 73 logging.info('Failed to copy /var/log/messages at startup: %s', e) 74 75 76 def get_autodir(self): 77 return self.autodir 78 79 80 def set_autodir(self, autodir): 81 """ 82 This method is called to make the host object aware of the 83 where autotest is installed. Called in server/autotest.py 84 after a successful install 85 """ 86 self.autodir = autodir 87 88 89 def sysrq_reboot(self): 90 self.run_background('echo b > /proc/sysrq-trigger') 91 92 93 def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True): 94 """ 95 Shut down the remote host. 96 97 N.B. This method makes no provision to bring the target back 98 up. The target will be offline indefinitely if there's no 99 independent hardware (servo, RPM, etc.) to force the target to 100 power on. 101 102 @param timeout Maximum time to wait for host down, in seconds. 103 @param wait Whether to wait for the host to go offline. 104 """ 105 self.run_background('sleep 1 ; halt') 106 if wait: 107 self.wait_down(timeout=timeout) 108 109 110 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True, 111 fastsync=False, reboot_cmd=None, **dargs): 112 """ 113 Reboot the remote host. 114 115 Args: 116 timeout - How long to wait for the reboot. 117 wait - Should we wait to see if the machine comes back up. 118 If this is set to True, ignores reboot_cmd's error 119 even if occurs. 120 fastsync - Don't wait for the sync to complete, just start one 121 and move on. This is for cases where rebooting prompty 122 is more important than data integrity and/or the 123 machine may have disks that cause sync to never return. 124 reboot_cmd - Reboot command to execute. 125 """ 126 self.reboot_setup(**dargs) 127 if not reboot_cmd: 128 reboot_cmd = ('sync & sleep 5; ' 129 'reboot & sleep 60; ' 130 'reboot -f & sleep 10; ' 131 'reboot -nf & sleep 10; ' 132 'telinit 6') 133 134 def reboot(): 135 # pylint: disable=missing-docstring 136 self.record("GOOD", None, "reboot.start") 137 try: 138 current_boot_id = self.get_boot_id() 139 140 # sync before starting the reboot, so that a long sync during 141 # shutdown isn't timed out by wait_down's short timeout 142 if not fastsync: 143 self.run('sync; sync', timeout=timeout, ignore_status=True) 144 145 self.run_background(reboot_cmd) 146 except error.AutoservRunError: 147 # If wait is set, ignore the error here, and rely on the 148 # wait_for_restart() for stability, instead. 149 # reboot_cmd sometimes causes an error even if reboot is 150 # successfully in progress. This is difficult to be avoided, 151 # because we have no much control on remote machine after 152 # "reboot" starts. 153 if not wait: 154 # TODO(b/37652392): Revisit no-wait case, later. 155 self.record("ABORT", None, "reboot.start", 156 "reboot command failed") 157 raise 158 if wait: 159 self.wait_for_restart(timeout, old_boot_id=current_boot_id, 160 **dargs) 161 162 # if this is a full reboot-and-wait, run the reboot inside a group 163 if wait: 164 self.log_op(self.OP_REBOOT, reboot) 165 else: 166 reboot() 167 168 def suspend(self, timeout, suspend_cmd, **dargs): 169 """ 170 Suspend the remote host. 171 172 Args: 173 timeout - How long to wait for the suspend. 174 susped_cmd - suspend command to execute. 175 """ 176 # define a function for the supend and run it in a group 177 def suspend(): 178 # pylint: disable=missing-docstring 179 self.record("GOOD", None, "suspend.start for %d seconds" % (timeout)) 180 try: 181 self.run_background(suspend_cmd) 182 except error.AutoservRunError: 183 self.record("ABORT", None, "suspend.start", 184 "suspend command failed") 185 raise error.AutoservSuspendError("suspend command failed") 186 187 # Wait for some time, to ensure the machine is going to sleep. 188 # Not too long to check if the machine really suspended. 189 time_slice = min(timeout / 2, 300) 190 time.sleep(time_slice) 191 time_counter = time_slice 192 while time_counter < timeout + 60: 193 # Check if the machine is back. We check regularely to 194 # ensure the machine was suspended long enough. 195 if utils.ping(self.hostname, tries=1, deadline=1) == 0: 196 return 197 else: 198 if time_counter > timeout - 10: 199 time_slice = 5 200 time.sleep(time_slice) 201 time_counter += time_slice 202 203 if utils.ping(self.hostname, tries=1, deadline=1) != 0: 204 raise error.AutoservSuspendError( 205 "DUT is not responding after %d seconds" % (time_counter)) 206 207 start_time = time.time() 208 self.log_op(self.OP_SUSPEND, suspend) 209 lasted = time.time() - start_time 210 if (lasted < timeout): 211 raise error.AutoservSuspendError( 212 "Suspend did not last long enough: %d instead of %d" % ( 213 lasted, timeout)) 214 215 def reboot_followup(self, *args, **dargs): 216 super(RemoteHost, self).reboot_followup(*args, **dargs) 217 if self.job: 218 self.job.profilers.handle_reboot(self) 219 220 221 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs): 222 """ 223 Wait for the host to come back from a reboot. This wraps the 224 generic wait_for_restart implementation in a reboot group. 225 """ 226 def op_func(): 227 # pylint: disable=missing-docstring 228 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs) 229 self.log_op(self.OP_REBOOT, op_func) 230 231 232 def cleanup(self): 233 super(RemoteHost, self).cleanup() 234 self.reboot() 235 236 237 def get_tmp_dir(self, parent='/tmp'): 238 """ 239 Return the pathname of a directory on the host suitable 240 for temporary file storage. 241 242 The directory and its content will be deleted automatically 243 on the destruction of the Host object that was used to obtain 244 it. 245 """ 246 self.run("mkdir -p %s" % parent) 247 template = os.path.join(parent, 'autoserv-XXXXXX') 248 dir_name = self.run("mktemp -d %s" % template).stdout.rstrip() 249 self.tmp_dirs.append(dir_name) 250 return dir_name 251 252 253 def get_platform_label(self): 254 """ 255 Return the platform label, or None if platform label is not set. 256 """ 257 258 if self.job: 259 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals', 260 self.hostname) 261 keyvals = utils.read_keyval(keyval_path) 262 return keyvals.get('platform', None) 263 else: 264 return None 265 266 267 def get_all_labels(self): 268 """ 269 Return all labels, or empty list if label is not set. 270 """ 271 if self.job: 272 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals', 273 self.hostname) 274 keyvals = utils.read_keyval(keyval_path) 275 all_labels = keyvals.get('labels', '') 276 if all_labels: 277 all_labels = all_labels.split(',') 278 return [urllib.unquote(label) for label in all_labels] 279 return [] 280 281 282 def delete_tmp_dir(self, tmpdir): 283 """ 284 Delete the given temporary directory on the remote machine. 285 286 @param tmpdir The directory to delete. 287 """ 288 self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True) 289 self.tmp_dirs.remove(tmpdir) 290 291 292 def check_uptime(self): 293 """ 294 Check that uptime is available and monotonically increasing. 295 """ 296 if not self.is_up(): 297 raise error.AutoservHostError('Client does not appear to be up') 298 result = self.run("/bin/cat /proc/uptime", 30) 299 return result.stdout.strip().split()[0] 300 301 302 def check_for_lkdtm(self): 303 """ 304 Check for kernel dump test module. return True if exist. 305 """ 306 cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT' 307 return self.run(cmd, ignore_status=True).exit_status == 0 308 309 310 def are_wait_up_processes_up(self): 311 """ 312 Checks if any HOSTS waitup processes are running yet on the 313 remote host. 314 315 Returns True if any the waitup processes are running, False 316 otherwise. 317 """ 318 processes = self.get_wait_up_processes() 319 if len(processes) == 0: 320 return True # wait up processes aren't being used 321 for procname in processes: 322 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname, 323 ignore_status=True).exit_status 324 if exit_status == 0: 325 return True 326 return False 327 328 329 def get_labels(self): 330 """Return a list of labels for this given host. 331 332 This is the main way to retrieve all the automatic labels for a host 333 as it will run through all the currently implemented label functions. 334 """ 335 labels = [] 336 for label_function in self._LABEL_FUNCTIONS: 337 try: 338 label = label_function(self) 339 except Exception: 340 logging.exception('Label function %s failed; ignoring it.', 341 label_function.__name__) 342 label = None 343 if label: 344 if type(label) is str: 345 labels.append(label) 346 elif type(label) is list: 347 labels.extend(label) 348 return labels 349