• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""This class defines the Remote host class."""
2
3import os, logging, urllib, time
4import re
5from autotest_lib.client.common_lib import error
6from autotest_lib.server import utils
7from autotest_lib.server.hosts import base_classes
8
9
10class RemoteHost(base_classes.Host):
11    """
12    This class represents a remote machine on which you can run
13    programs.
14
15    It may be accessed through a network, a serial line, ...
16    It is not the machine autoserv is running on.
17
18    Implementation details:
19    This is an abstract class, leaf subclasses must implement the methods
20    listed here and in parent classes which have no implementation. They
21    may reimplement methods which already have an implementation. You
22    must not instantiate this class but should instantiate one of those
23    leaf subclasses.
24    """
25
26    DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
27    DEFAULT_HALT_TIMEOUT = 2 * 60
28    _LABEL_FUNCTIONS = []
29    _DETECTABLE_LABELS = []
30
31    VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
32    TMP_DIR_TEMPLATE = 'autoserv-XXXXXX'
33
34
35    def _initialize(self, hostname, autodir=None, *args, **dargs):
36        super(RemoteHost, self)._initialize(*args, **dargs)
37
38        self.hostname = hostname
39        self.autodir = autodir
40        self.tmp_dirs = []
41
42
43    def __repr__(self):
44        return "<remote host: %s>" % self.hostname
45
46
47    def close(self):
48        # pylint: disable=missing-docstring
49        super(RemoteHost, self).close()
50        self.stop_loggers()
51
52        if hasattr(self, 'tmp_dirs'):
53            for dir in self.tmp_dirs:
54                try:
55                    self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
56                except error.AutoservRunError:
57                    pass
58
59
60    def job_start(self):
61        """
62        Abstract method, called the first time a remote host object
63        is created for a specific host after a job starts.
64
65        This method depends on the create_host factory being used to
66        construct your host object. If you directly construct host objects
67        you will need to call this method yourself (and enforce the
68        single-call rule).
69        """
70        try:
71            cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages '
72                   '%s') % self.VAR_LOG_MESSAGES_COPY_PATH
73            self.run(cmd)
74        except Exception, e:
75            # Non-fatal error
76            logging.info('Failed to copy /var/log/messages at startup: %s', e)
77
78
79    def get_autodir(self):
80        return self.autodir
81
82
83    def set_autodir(self, autodir):
84        """
85        This method is called to make the host object aware of the
86        where autotest is installed. Called in server/autotest.py
87        after a successful install
88        """
89        self.autodir = autodir
90
91
92    def sysrq_reboot(self):
93        # pylint: disable=missing-docstring
94        self.run_background('echo b > /proc/sysrq-trigger')
95
96
97    def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
98        """
99        Shut down the remote host.
100
101        N.B.  This method makes no provision to bring the target back
102        up.  The target will be offline indefinitely if there's no
103        independent hardware (servo, RPM, etc.) to force the target to
104        power on.
105
106        @param timeout  Maximum time to wait for host down, in seconds.
107        @param wait  Whether to wait for the host to go offline.
108        """
109        self.run_background('sleep 1 ; halt')
110        if wait:
111            self.wait_down(timeout=timeout)
112
113
114    def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True,
115               fastsync=False, reboot_cmd=None, **dargs):
116        """
117        Reboot the remote host.
118
119        Args:
120                timeout - How long to wait for the reboot.
121                wait - Should we wait to see if the machine comes back up.
122                       If this is set to True, ignores reboot_cmd's error
123                       even if occurs.
124                fastsync - Don't wait for the sync to complete, just start one
125                        and move on. This is for cases where rebooting prompty
126                        is more important than data integrity and/or the
127                        machine may have disks that cause sync to never return.
128                reboot_cmd - Reboot command to execute.
129        """
130        self.reboot_setup(**dargs)
131        if not reboot_cmd:
132            reboot_cmd = ('sync & sleep 5; '
133                          'reboot & sleep 60; '
134                          'reboot -f & sleep 10; '
135                          'reboot -nf & sleep 10; '
136                          'telinit 6')
137
138        def reboot():
139            # pylint: disable=missing-docstring
140            self.record("GOOD", None, "reboot.start")
141            current_boot_id = None
142            try:
143                current_boot_id = self.get_boot_id()
144
145                # sync before starting the reboot, so that a long sync during
146                # shutdown isn't timed out by wait_down's short timeout
147                if not fastsync:
148                    self.run('sync; sync', timeout=timeout, ignore_status=True)
149
150                self.run_background(reboot_cmd)
151            except error.AutoservRunError:
152                # If wait is set, ignore the error here, and rely on the
153                # wait_for_restart() for stability, instead.
154                # reboot_cmd sometimes causes an error even if reboot is
155                # successfully in progress. This is difficult to be avoided,
156                # because we have no much control on remote machine after
157                # "reboot" starts.
158                if not wait or current_boot_id is None:
159                    # TODO(b/37652392): Revisit no-wait case, later.
160                    self.record("ABORT", None, "reboot.start",
161                                "reboot command failed")
162                    raise
163            if wait:
164                self.wait_for_restart(timeout, old_boot_id=current_boot_id,
165                                      **dargs)
166
167        # if this is a full reboot-and-wait, run the reboot inside a group
168        if wait:
169            self.log_op(self.OP_REBOOT, reboot)
170        else:
171            reboot()
172
173    def suspend(self, timeout, suspend_cmd,
174                allow_early_resume=False):
175        """
176        Suspend the remote host.
177
178        Args:
179                timeout - How long to wait for the suspend in integer seconds.
180                suspend_cmd - suspend command to execute.
181                allow_early_resume - Boolean that indicate whether resume
182                                     before |timeout| is ok.
183        Raises:
184                error.AutoservSuspendError - If |allow_early_resume| is False
185                                             and if device resumes before
186                                             |timeout|.
187        """
188        # define a function for the supend and run it in a group
189        def suspend():
190            # pylint: disable=missing-docstring
191            self.record("GOOD", None, "suspend.start for %d seconds" % (timeout))
192            try:
193                self.run_background(suspend_cmd)
194            except error.AutoservRunError:
195                self.record("ABORT", None, "suspend.start",
196                            "suspend command failed")
197                raise error.AutoservSuspendError("suspend command failed")
198
199            # Wait for some time, to ensure the machine is going to sleep.
200            # Not too long to check if the machine really suspended.
201            time_slice = min(timeout / 2, 300)
202            time.sleep(time_slice)
203            time_counter = time_slice
204            while time_counter < timeout + 60:
205                # Check if the machine is back. We check regularely to
206                # ensure the machine was suspended long enough.
207                if utils.ping(self.hostname, tries=1, deadline=1) == 0:
208                    return
209                else:
210                    if time_counter > timeout - 10:
211                        time_slice = 5
212                    time.sleep(time_slice)
213                    time_counter += time_slice
214
215            if utils.ping(self.hostname, tries=1, deadline=1) != 0:
216                raise error.AutoservSuspendError(
217                    "DUT is not responding after %d seconds" % (time_counter))
218
219        start_time = time.time()
220        self.log_op(self.OP_SUSPEND, suspend)
221        lasted = time.time() - start_time
222        logging.info("Device resumed after %d secs", lasted)
223        if (lasted < timeout and not allow_early_resume):
224            raise error.AutoservSuspendError(
225                "Suspend did not last long enough: %d instead of %d" % (
226                    lasted, timeout))
227
228    def reboot_followup(self, *args, **dargs):
229        # pylint: disable=missing-docstring
230        super(RemoteHost, self).reboot_followup(*args, **dargs)
231        if self.job:
232            self.job.profilers.handle_reboot(self)
233
234
235    def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
236        """
237        Wait for the host to come back from a reboot. This wraps the
238        generic wait_for_restart implementation in a reboot group.
239        """
240        def op_func():
241            # pylint: disable=missing-docstring
242            super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
243        self.log_op(self.OP_REBOOT, op_func)
244
245
246    def cleanup(self):
247        # pylint: disable=missing-docstring
248        super(RemoteHost, self).cleanup()
249        self.reboot()
250
251
252    def get_tmp_dir(self, parent='/tmp'):
253        """
254        Return the pathname of a directory on the host suitable
255        for temporary file storage.
256
257        The directory and its content will be deleted automatically
258        on the destruction of the Host object that was used to obtain
259        it.
260        """
261        self.run("mkdir -p %s" % parent)
262        template = os.path.join(parent, self.TMP_DIR_TEMPLATE)
263        dir_name = self.run("mktemp -d %s" % template).stdout.rstrip()
264        self.tmp_dirs.append(dir_name)
265        return dir_name
266
267
268    def get_platform_label(self):
269        """
270        Return the platform label, or None if platform label is not set.
271        """
272
273        if self.job:
274            keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
275                                       self.hostname)
276            keyvals = utils.read_keyval(keyval_path)
277            return keyvals.get('platform', None)
278        else:
279            return None
280
281
282    def get_all_labels(self):
283        """
284        Return all labels, or empty list if label is not set.
285        """
286        if self.job:
287            keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
288                                       self.hostname)
289            keyvals = utils.read_keyval(keyval_path)
290            all_labels = keyvals.get('labels', '')
291            if all_labels:
292                all_labels = all_labels.split(',')
293                return [urllib.unquote(label) for label in all_labels]
294        return []
295
296
297    def delete_tmp_dir(self, tmpdir):
298        """
299        Delete the given temporary directory on the remote machine.
300
301        @param tmpdir The directory to delete.
302        """
303        self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
304        self.tmp_dirs.remove(tmpdir)
305
306
307    def delete_all_tmp_dirs(self, parent='/tmp'):
308        """
309        Delete all directories in parent that were created by get_tmp_dir
310
311        Note that this may involve deleting directories created by calls to
312        get_tmp_dir on a different RemoteHost instance than the one running this
313        method. Only perform this operation when certain that this will not
314        cause unexpected behavior.
315        """
316        # follow mktemp's behavior of only expanding 3 or more consecutive Xs
317        base_template = re.sub('XXXX*', '*', self.TMP_DIR_TEMPLATE)
318        # distinguish between non-wildcard asterisks in parent directory name
319        # and wildcards inserted from the template
320        base = '*'.join(map(lambda x: '"%s"' % utils.sh_escape(x),
321                base_template.split('*')))
322        path = '"%s' % os.path.join(utils.sh_escape(parent), base[1:])
323        self.run('rm -rf %s' % path, ignore_status=True)
324        # remove deleted directories from tmp_dirs
325        regex = os.path.join(parent, re.sub('(XXXX*)',
326                        lambda match: '[a-zA-Z0-9]{%d}' % len(match.group(1)),
327                        self.TMP_DIR_TEMPLATE))
328        regex += '(/|$)' # remove if matches, or is within a dir that matches
329        self.tmp_dirs = filter(lambda x: not re.match(regex, x), self.tmp_dirs)
330
331
332    def check_uptime(self):
333        """
334        Check that uptime is available and monotonically increasing.
335        """
336        if not self.is_up():
337            raise error.AutoservHostError('Client does not appear to be up')
338        result = self.run("/bin/cat /proc/uptime", 30)
339        return result.stdout.strip().split()[0]
340
341
342    def check_for_lkdtm(self):
343        """
344        Check for kernel dump test module. return True if exist.
345        """
346        cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT'
347        return self.run(cmd, ignore_status=True).exit_status == 0
348
349
350    def are_wait_up_processes_up(self):
351        """
352        Checks if any HOSTS waitup processes are running yet on the
353        remote host.
354
355        Returns True if any the waitup processes are running, False
356        otherwise.
357        """
358        processes = self.get_wait_up_processes()
359        if len(processes) == 0:
360            return True # wait up processes aren't being used
361        for procname in processes:
362            exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
363                                   ignore_status=True).exit_status
364            if exit_status == 0:
365                return True
366        return False
367
368
369    def get_labels(self):
370        """Return a list of labels for this given host.
371
372        This is the main way to retrieve all the automatic labels for a host
373        as it will run through all the currently implemented label functions.
374        """
375        labels = []
376        for label_function in self._LABEL_FUNCTIONS:
377            try:
378                label = label_function(self)
379            except Exception:
380                logging.exception('Label function %s failed; ignoring it.',
381                                  label_function.__name__)
382                label = None
383            if label:
384                if type(label) is str:
385                    labels.append(label)
386                elif type(label) is list:
387                    labels.extend(label)
388        return labels
389