• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""This class defines the Remote host class."""
2
3import os, logging, urllib, time
4from autotest_lib.client.common_lib import error
5from autotest_lib.server import utils
6from autotest_lib.server.hosts import base_classes
7
8
9class RemoteHost(base_classes.Host):
10    """
11    This class represents a remote machine on which you can run
12    programs.
13
14    It may be accessed through a network, a serial line, ...
15    It is not the machine autoserv is running on.
16
17    Implementation details:
18    This is an abstract class, leaf subclasses must implement the methods
19    listed here and in parent classes which have no implementation. They
20    may reimplement methods which already have an implementation. You
21    must not instantiate this class but should instantiate one of those
22    leaf subclasses.
23    """
24
25    DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
26    DEFAULT_HALT_TIMEOUT = 2 * 60
27    _LABEL_FUNCTIONS = []
28    _DETECTABLE_LABELS = []
29
30    VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
31
32
33    def _initialize(self, hostname, autodir=None, *args, **dargs):
34        super(RemoteHost, self)._initialize(*args, **dargs)
35
36        self.hostname = hostname
37        self.autodir = autodir
38        self.tmp_dirs = []
39
40
41    def __repr__(self):
42        return "<remote host: %s>" % self.hostname
43
44
45    def close(self):
46        super(RemoteHost, self).close()
47        self.stop_loggers()
48
49        if hasattr(self, 'tmp_dirs'):
50            for dir in self.tmp_dirs:
51                try:
52                    self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
53                except error.AutoservRunError:
54                    pass
55
56
57    def job_start(self):
58        """
59        Abstract method, called the first time a remote host object
60        is created for a specific host after a job starts.
61
62        This method depends on the create_host factory being used to
63        construct your host object. If you directly construct host objects
64        you will need to call this method yourself (and enforce the
65        single-call rule).
66        """
67        try:
68            cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages '
69                   '%s') % self.VAR_LOG_MESSAGES_COPY_PATH
70            self.run(cmd)
71        except Exception, e:
72            # Non-fatal error
73            logging.info('Failed to copy /var/log/messages at startup: %s', e)
74
75
76    def get_autodir(self):
77        return self.autodir
78
79
80    def set_autodir(self, autodir):
81        """
82        This method is called to make the host object aware of the
83        where autotest is installed. Called in server/autotest.py
84        after a successful install
85        """
86        self.autodir = autodir
87
88
89    def sysrq_reboot(self):
90        self.run_background('echo b > /proc/sysrq-trigger')
91
92
93    def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
94        """
95        Shut down the remote host.
96
97        N.B.  This method makes no provision to bring the target back
98        up.  The target will be offline indefinitely if there's no
99        independent hardware (servo, RPM, etc.) to force the target to
100        power on.
101
102        @param timeout  Maximum time to wait for host down, in seconds.
103        @param wait  Whether to wait for the host to go offline.
104        """
105        self.run_background('sleep 1 ; halt')
106        if wait:
107            self.wait_down(timeout=timeout)
108
109
110    def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True,
111               fastsync=False, reboot_cmd=None, **dargs):
112        """
113        Reboot the remote host.
114
115        Args:
116                timeout - How long to wait for the reboot.
117                wait - Should we wait to see if the machine comes back up.
118                       If this is set to True, ignores reboot_cmd's error
119                       even if occurs.
120                fastsync - Don't wait for the sync to complete, just start one
121                        and move on. This is for cases where rebooting prompty
122                        is more important than data integrity and/or the
123                        machine may have disks that cause sync to never return.
124                reboot_cmd - Reboot command to execute.
125        """
126        self.reboot_setup(**dargs)
127        if not reboot_cmd:
128            reboot_cmd = ('sync & sleep 5; '
129                          'reboot & sleep 60; '
130                          'reboot -f & sleep 10; '
131                          'reboot -nf & sleep 10; '
132                          'telinit 6')
133
134        def reboot():
135            # pylint: disable=missing-docstring
136            self.record("GOOD", None, "reboot.start")
137            try:
138                current_boot_id = self.get_boot_id()
139
140                # sync before starting the reboot, so that a long sync during
141                # shutdown isn't timed out by wait_down's short timeout
142                if not fastsync:
143                    self.run('sync; sync', timeout=timeout, ignore_status=True)
144
145                self.run_background(reboot_cmd)
146            except error.AutoservRunError:
147                # If wait is set, ignore the error here, and rely on the
148                # wait_for_restart() for stability, instead.
149                # reboot_cmd sometimes causes an error even if reboot is
150                # successfully in progress. This is difficult to be avoided,
151                # because we have no much control on remote machine after
152                # "reboot" starts.
153                if not wait:
154                    # TODO(b/37652392): Revisit no-wait case, later.
155                    self.record("ABORT", None, "reboot.start",
156                                "reboot command failed")
157                    raise
158            if wait:
159                self.wait_for_restart(timeout, old_boot_id=current_boot_id,
160                                      **dargs)
161
162        # if this is a full reboot-and-wait, run the reboot inside a group
163        if wait:
164            self.log_op(self.OP_REBOOT, reboot)
165        else:
166            reboot()
167
168    def suspend(self, timeout, suspend_cmd, **dargs):
169        """
170        Suspend the remote host.
171
172        Args:
173                timeout - How long to wait for the suspend.
174                susped_cmd - suspend command to execute.
175        """
176        # define a function for the supend and run it in a group
177        def suspend():
178            # pylint: disable=missing-docstring
179            self.record("GOOD", None, "suspend.start for %d seconds" % (timeout))
180            try:
181                self.run_background(suspend_cmd)
182            except error.AutoservRunError:
183                self.record("ABORT", None, "suspend.start",
184                            "suspend command failed")
185                raise error.AutoservSuspendError("suspend command failed")
186
187            # Wait for some time, to ensure the machine is going to sleep.
188            # Not too long to check if the machine really suspended.
189            time_slice = min(timeout / 2, 300)
190            time.sleep(time_slice)
191            time_counter = time_slice
192            while time_counter < timeout + 60:
193                # Check if the machine is back. We check regularely to
194                # ensure the machine was suspended long enough.
195                if utils.ping(self.hostname, tries=1, deadline=1) == 0:
196                    return
197                else:
198                    if time_counter > timeout - 10:
199                        time_slice = 5
200                    time.sleep(time_slice)
201                    time_counter += time_slice
202
203            if utils.ping(self.hostname, tries=1, deadline=1) != 0:
204                raise error.AutoservSuspendError(
205                    "DUT is not responding after %d seconds" % (time_counter))
206
207        start_time = time.time()
208        self.log_op(self.OP_SUSPEND, suspend)
209        lasted = time.time() - start_time
210        if (lasted < timeout):
211            raise error.AutoservSuspendError(
212                "Suspend did not last long enough: %d instead of %d" % (
213                    lasted, timeout))
214
215    def reboot_followup(self, *args, **dargs):
216        super(RemoteHost, self).reboot_followup(*args, **dargs)
217        if self.job:
218            self.job.profilers.handle_reboot(self)
219
220
221    def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
222        """
223        Wait for the host to come back from a reboot. This wraps the
224        generic wait_for_restart implementation in a reboot group.
225        """
226        def op_func():
227            # pylint: disable=missing-docstring
228            super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
229        self.log_op(self.OP_REBOOT, op_func)
230
231
232    def cleanup(self):
233        super(RemoteHost, self).cleanup()
234        self.reboot()
235
236
237    def get_tmp_dir(self, parent='/tmp'):
238        """
239        Return the pathname of a directory on the host suitable
240        for temporary file storage.
241
242        The directory and its content will be deleted automatically
243        on the destruction of the Host object that was used to obtain
244        it.
245        """
246        self.run("mkdir -p %s" % parent)
247        template = os.path.join(parent, 'autoserv-XXXXXX')
248        dir_name = self.run("mktemp -d %s" % template).stdout.rstrip()
249        self.tmp_dirs.append(dir_name)
250        return dir_name
251
252
253    def get_platform_label(self):
254        """
255        Return the platform label, or None if platform label is not set.
256        """
257
258        if self.job:
259            keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
260                                       self.hostname)
261            keyvals = utils.read_keyval(keyval_path)
262            return keyvals.get('platform', None)
263        else:
264            return None
265
266
267    def get_all_labels(self):
268        """
269        Return all labels, or empty list if label is not set.
270        """
271        if self.job:
272            keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
273                                       self.hostname)
274            keyvals = utils.read_keyval(keyval_path)
275            all_labels = keyvals.get('labels', '')
276            if all_labels:
277                all_labels = all_labels.split(',')
278                return [urllib.unquote(label) for label in all_labels]
279        return []
280
281
282    def delete_tmp_dir(self, tmpdir):
283        """
284        Delete the given temporary directory on the remote machine.
285
286        @param tmpdir The directory to delete.
287        """
288        self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
289        self.tmp_dirs.remove(tmpdir)
290
291
292    def check_uptime(self):
293        """
294        Check that uptime is available and monotonically increasing.
295        """
296        if not self.is_up():
297            raise error.AutoservHostError('Client does not appear to be up')
298        result = self.run("/bin/cat /proc/uptime", 30)
299        return result.stdout.strip().split()[0]
300
301
302    def check_for_lkdtm(self):
303        """
304        Check for kernel dump test module. return True if exist.
305        """
306        cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT'
307        return self.run(cmd, ignore_status=True).exit_status == 0
308
309
310    def are_wait_up_processes_up(self):
311        """
312        Checks if any HOSTS waitup processes are running yet on the
313        remote host.
314
315        Returns True if any the waitup processes are running, False
316        otherwise.
317        """
318        processes = self.get_wait_up_processes()
319        if len(processes) == 0:
320            return True # wait up processes aren't being used
321        for procname in processes:
322            exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
323                                   ignore_status=True).exit_status
324            if exit_status == 0:
325                return True
326        return False
327
328
329    def get_labels(self):
330        """Return a list of labels for this given host.
331
332        This is the main way to retrieve all the automatic labels for a host
333        as it will run through all the currently implemented label functions.
334        """
335        labels = []
336        for label_function in self._LABEL_FUNCTIONS:
337            try:
338                label = label_function(self)
339            except Exception:
340                logging.exception('Label function %s failed; ignoring it.',
341                                  label_function.__name__)
342                label = None
343            if label:
344                if type(label) is str:
345                    labels.append(label)
346                elif type(label) is list:
347                    labels.extend(label)
348        return labels
349