• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Lint as: python2, python3
2"""This class defines the Remote host class."""
3
4from __future__ import absolute_import
5from __future__ import division
6from __future__ import print_function
7import os, logging, time
8import six
9from six.moves import urllib
10import re
11from autotest_lib.client.common_lib import error
12from autotest_lib.server import utils
13from autotest_lib.server.hosts import base_classes
14
15
16class RemoteHost(base_classes.Host):
17    """
18    This class represents a remote machine on which you can run
19    programs.
20
21    It may be accessed through a network, a serial line, ...
22    It is not the machine autoserv is running on.
23
24    Implementation details:
25    This is an abstract class, leaf subclasses must implement the methods
26    listed here and in parent classes which have no implementation. They
27    may reimplement methods which already have an implementation. You
28    must not instantiate this class but should instantiate one of those
29    leaf subclasses.
30    """
31
32    DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
33    DEFAULT_HALT_TIMEOUT = 2 * 60
34    _LABEL_FUNCTIONS = []
35    _DETECTABLE_LABELS = []
36
37    VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
38    TMP_DIR_TEMPLATE = '/usr/local/tmp/autoserv-XXXXXX'
39
40
41    def _initialize(self, hostname, autodir=None, *args, **dargs):
42        super(RemoteHost, self)._initialize(*args, **dargs)
43
44        self.hostname = hostname
45        self.autodir = autodir
46        self.tmp_dirs = []
47
48
49    def __repr__(self):
50        return "<remote host: %s>" % self.hostname
51
52
53    def close(self):
54        # pylint: disable=missing-docstring
55        super(RemoteHost, self).close()
56        self.stop_loggers()
57
58        if hasattr(self, 'tmp_dirs'):
59            for dir in self.tmp_dirs:
60                try:
61                    self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
62                except error.AutoservRunError:
63                    pass
64
65
66    def job_start(self):
67        """
68        Abstract method, called the first time a remote host object
69        is created for a specific host after a job starts.
70
71        This method depends on the create_host factory being used to
72        construct your host object. If you directly construct host objects
73        you will need to call this method yourself (and enforce the
74        single-call rule).
75        """
76        try:
77            cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages '
78                   '%s') % self.VAR_LOG_MESSAGES_COPY_PATH
79            self.run(cmd)
80        except Exception as e:
81            # Non-fatal error
82            logging.info('Failed to copy /var/log/messages at startup: %s', e)
83
84
85    def get_autodir(self):
86        return self.autodir
87
88
89    def set_autodir(self, autodir):
90        """
91        This method is called to make the host object aware of the
92        where autotest is installed. Called in server/autotest.py
93        after a successful install
94        """
95        self.autodir = autodir
96
97
98    def sysrq_reboot(self):
99        # pylint: disable=missing-docstring
100        self.run_background('echo b > /proc/sysrq-trigger')
101
102
103    def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
104        """
105        Shut down the remote host.
106
107        N.B.  This method makes no provision to bring the target back
108        up.  The target will be offline indefinitely if there's no
109        independent hardware (servo, RPM, etc.) to force the target to
110        power on.
111
112        @param timeout  Maximum time to wait for host down, in seconds.
113        @param wait  Whether to wait for the host to go offline.
114        """
115        self.run_background('sleep 1 ; halt')
116        if wait:
117            self.wait_down(timeout=timeout)
118
119
120    def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True,
121               fastsync=False, reboot_cmd=None, **dargs):
122        """
123        Reboot the remote host.
124
125        Args:
126                timeout - How long to wait for the reboot.
127                wait - Should we wait to see if the machine comes back up.
128                       If this is set to True, ignores reboot_cmd's error
129                       even if occurs.
130                fastsync - Don't wait for the sync to complete, just start one
131                        and move on. This is for cases where rebooting prompty
132                        is more important than data integrity and/or the
133                        machine may have disks that cause sync to never return.
134                reboot_cmd - Reboot command to execute.
135        """
136        self.reboot_setup(**dargs)
137        if not reboot_cmd:
138            reboot_cmd = ('sync & sleep 5; '
139                          'reboot & sleep 60; '
140                          'reboot -f & sleep 10; '
141                          'reboot -nf & sleep 10; '
142                          'telinit 6')
143
144        def reboot():
145            # pylint: disable=missing-docstring
146            self.record("GOOD", None, "reboot.start")
147            current_boot_id = None
148            try:
149                current_boot_id = self.get_boot_id()
150
151                # sync before starting the reboot, so that a long sync during
152                # shutdown isn't timed out by wait_down's short timeout
153                if not fastsync:
154                    self.run('sync; sync', timeout=timeout, ignore_status=True)
155
156                self.run_background(reboot_cmd)
157            except error.AutoservRunError:
158                # If wait is set, ignore the error here, and rely on the
159                # wait_for_restart() for stability, instead.
160                # reboot_cmd sometimes causes an error even if reboot is
161                # successfully in progress. This is difficult to be avoided,
162                # because we have no much control on remote machine after
163                # "reboot" starts.
164                if not wait or current_boot_id is None:
165                    # TODO(b/37652392): Revisit no-wait case, later.
166                    self.record("ABORT", None, "reboot.start",
167                                "reboot command failed")
168                    raise
169            if wait:
170                self.wait_for_restart(timeout, old_boot_id=current_boot_id,
171                                      **dargs)
172
173        # if this is a full reboot-and-wait, run the reboot inside a group
174        if wait:
175            self.log_op(self.OP_REBOOT, reboot)
176        else:
177            reboot()
178
179    def suspend(self, timeout, suspend_cmd,
180                allow_early_resume=False):
181        """
182        Suspend the remote host.
183
184        Args:
185                timeout - How long to wait for the suspend in integer seconds.
186                suspend_cmd - suspend command to execute.
187                allow_early_resume - Boolean that indicate whether resume
188                                     before |timeout| is ok.
189        Raises:
190                error.AutoservSuspendError - If |allow_early_resume| is False
191                                             and if device resumes before
192                                             |timeout|.
193        """
194        # define a function for the supend and run it in a group
195        def suspend():
196            # pylint: disable=missing-docstring
197            self.record("GOOD", None, "suspend.start for %d seconds" % (timeout))
198            try:
199                self.run_background(suspend_cmd)
200            except error.AutoservRunError:
201                self.record("ABORT", None, "suspend.start",
202                            "suspend command failed")
203                raise error.AutoservSuspendError("suspend command failed")
204
205            # Wait for some time, to ensure the machine is going to sleep.
206            # Not too long to check if the machine really suspended.
207            time_slice = min(timeout / 2, 300)
208            time.sleep(time_slice)
209            time_counter = time_slice
210            while time_counter < timeout + 60:
211                # Check if the machine is back. We check regularely to
212                # ensure the machine was suspended long enough.
213                if utils.ping(self.hostname, tries=1, deadline=1) == 0:
214                    return
215                else:
216                    if time_counter > timeout - 10:
217                        time_slice = 5
218                    time.sleep(time_slice)
219                    time_counter += time_slice
220
221            if utils.ping(self.hostname, tries=1, deadline=1) != 0:
222                raise error.AutoservSuspendError(
223                    "DUT is not responding after %d seconds" % (time_counter))
224
225        start_time = time.time()
226        self.log_op(self.OP_SUSPEND, suspend)
227        lasted = time.time() - start_time
228        logging.info("Device resumed after %d secs", lasted)
229        if (lasted < timeout and not allow_early_resume):
230            raise error.AutoservSuspendError(
231                "Suspend did not last long enough: %d instead of %d" % (
232                    lasted, timeout))
233
234    def reboot_followup(self, *args, **dargs):
235        # pylint: disable=missing-docstring
236        super(RemoteHost, self).reboot_followup(*args, **dargs)
237        if self.job:
238            self.job.profilers.handle_reboot(self)
239
240
241    def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
242        """
243        Wait for the host to come back from a reboot. This wraps the
244        generic wait_for_restart implementation in a reboot group.
245        """
246        def op_func():
247            # pylint: disable=missing-docstring
248            super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
249        self.log_op(self.OP_REBOOT, op_func)
250
251
252    def cleanup(self):
253        # pylint: disable=missing-docstring
254        super(RemoteHost, self).cleanup()
255        self.reboot()
256
257
258    def get_tmp_dir(self, parent='/tmp'):
259        """
260        Return the pathname of a directory on the host suitable
261        for temporary file storage.
262
263        The directory and its content will be deleted automatically
264        on the destruction of the Host object that was used to obtain
265        it.
266        """
267        template = os.path.join(parent, self.TMP_DIR_TEMPLATE)
268        dir_name = self.run('mkdir -p %s && mktemp -d %s' % (parent, template)).stdout.rstrip()
269        self.tmp_dirs.append(dir_name)
270        return dir_name
271
272
273    def get_platform_label(self):
274        """
275        Return the platform label, or None if platform label is not set.
276        """
277
278        if self.job:
279            keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
280                                       self.hostname)
281            keyvals = utils.read_keyval(keyval_path)
282            return keyvals.get('platform', None)
283        else:
284            return None
285
286
287    def get_all_labels(self):
288        """
289        Return all labels, or empty list if label is not set.
290        """
291        if self.job:
292            keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
293                                       self.hostname)
294            keyvals = utils.read_keyval(keyval_path)
295            all_labels = keyvals.get('labels', '')
296            if all_labels:
297                all_labels = all_labels.split(',')
298                return [urllib.parse.unquote(label) for label in all_labels]
299        return []
300
301
302    def delete_tmp_dir(self, tmpdir):
303        """
304        Delete the given temporary directory on the remote machine.
305
306        @param tmpdir The directory to delete.
307        """
308        self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
309        self.tmp_dirs.remove(tmpdir)
310
311
312    def delete_all_tmp_dirs(self, parent='/tmp'):
313        """
314        Delete all directories in parent that were created by get_tmp_dir
315
316        Note that this may involve deleting directories created by calls to
317        get_tmp_dir on a different RemoteHost instance than the one running this
318        method. Only perform this operation when certain that this will not
319        cause unexpected behavior.
320        """
321        # follow mktemp's behavior of only expanding 3 or more consecutive Xs
322        if isinstance(parent, (list, tuple)):
323            parents = parent
324        else:
325            parents = [parent]
326        rm_paths = []
327        for parent in parents:
328            base_template = re.sub('XXXX*', '*', self.TMP_DIR_TEMPLATE)
329            # distinguish between non-wildcard asterisks in parent directory name
330            # and wildcards inserted from the template
331            base = '*'.join(
332                ['"%s"' % utils.sh_escape(x) for x in base_template.split('*')])
333            path = '"%s' % os.path.join(utils.sh_escape(parent), base[1:])
334            rm_paths.append(path)
335            # remove deleted directories from tmp_dirs
336            regex = os.path.join(parent, re.sub('(XXXX*)',
337                            lambda match: '[a-zA-Z0-9]{%d}' % len(match.group(1)),
338                            self.TMP_DIR_TEMPLATE))
339            regex += '(/|$)' # remove if matches, or is within a dir that matches
340            self.tmp_dirs = [x for x in self.tmp_dirs if not re.match(regex, x)]
341
342        self.run('rm -rf {}'.format(" ".join(rm_paths)), ignore_status=True)
343
344    def check_uptime(self):
345        """
346        Check that uptime is available and monotonically increasing.
347        """
348        if not self.is_up():
349            raise error.AutoservHostError('Client does not appear to be up')
350        result = self.run("/bin/cat /proc/uptime", 30)
351        return result.stdout.strip().split()[0]
352
353
354    def check_for_lkdtm(self):
355        """
356        Check for kernel dump test module. return True if exist.
357        """
358        cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT'
359        return self.run(cmd, ignore_status=True).exit_status == 0
360
361
362    def are_wait_up_processes_up(self):
363        """
364        Checks if any HOSTS waitup processes are running yet on the
365        remote host.
366
367        Returns True if any the waitup processes are running, False
368        otherwise.
369        """
370        processes = self.get_wait_up_processes()
371        if len(processes) == 0:
372            return True # wait up processes aren't being used
373        for procname in processes:
374            exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
375                                   ignore_status=True).exit_status
376            if exit_status == 0:
377                return True
378        return False
379
380
381    def get_labels(self):
382        """Return a list of labels for this given host.
383
384        This is the main way to retrieve all the automatic labels for a host
385        as it will run through all the currently implemented label functions.
386        """
387        labels = []
388        for label_function in self._LABEL_FUNCTIONS:
389            try:
390                label = label_function(self)
391            except Exception:
392                logging.exception('Label function %s failed; ignoring it.',
393                                  label_function.__name__)
394                label = None
395            if label:
396                if type(label) is str:
397                    labels.append(label)
398                elif type(label) is list:
399                    labels.extend(label)
400        return labels
401
402    def get_result_dir(self):
403        """Return the result directory path if passed or None if not.
404
405        @return string
406        """
407        if self.job and hasattr(self.job, 'resultdir'):
408            return self.job.resultdir
409        return None
410