• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2007 Google Inc. Released under the GPL v2
2#pylint: disable-msg=C0111
3
4import re, os, sys, traceback, time, glob, tempfile
5import logging
6from autotest_lib.server import installable_object, prebuild, utils
7from autotest_lib.client.common_lib import base_job, error, autotemp
8from autotest_lib.client.common_lib import base_packages, packages
9from autotest_lib.client.common_lib import global_config
10from autotest_lib.client.common_lib import utils as client_utils
11from autotest_lib.client.common_lib.cros.graphite import autotest_stats
12
13AUTOTEST_SVN = 'svn://test.kernel.org/autotest/trunk/client'
14AUTOTEST_HTTP = 'http://test.kernel.org/svn/autotest/trunk/client'
15
16
17get_value = global_config.global_config.get_config_value
18autoserv_prebuild = get_value('AUTOSERV', 'enable_server_prebuild',
19                              type=bool, default=False)
20
21
22class AutodirNotFoundError(Exception):
23    """No Autotest installation could be found."""
24
25
26class AutotestFailure(Exception):
27    """Gereric exception class for failures during a test run."""
28
29
30class AutotestAbort(AutotestFailure):
31    """
32    AutotestAborts are thrown when the DUT seems fine,
33    and the test doesn't give us an explicit reason for
34    failure; In this case we have no choice but to abort.
35    """
36
37
38class AutotestDeviceError(AutotestFailure):
39    """
40    Exceptions that inherit from AutotestDeviceError
41    are thrown when we can determine the current
42    state of the DUT and conclude that it probably
43    lead to the test failing; these exceptions lead
44    to failures instead of aborts.
45    """
46
47
48class AutotestDeviceNotPingable(AutotestDeviceError):
49    """Error for when a DUT becomes unpingable."""
50
51
52class AutotestDeviceNotSSHable(AutotestDeviceError):
53    """Error for when a DUT is pingable but not SSHable."""
54
55
56class AutotestDeviceRebooted(AutotestDeviceError):
57    """Error for when a DUT rebooted unexpectedly."""
58
59
60class BaseAutotest(installable_object.InstallableObject):
61    """
62    This class represents the Autotest program.
63
64    Autotest is used to run tests automatically and collect the results.
65    It also supports profilers.
66
67    Implementation details:
68    This is a leaf class in an abstract class hierarchy, it must
69    implement the unimplemented methods in parent classes.
70    """
71
72    def __init__(self, host=None):
73        self.host = host
74        self.got = False
75        self.installed = False
76        self.serverdir = utils.get_server_dir()
77        super(BaseAutotest, self).__init__()
78
79
80    install_in_tmpdir = False
81    @classmethod
82    def set_install_in_tmpdir(cls, flag):
83        """ Sets a flag that controls whether or not Autotest should by
84        default be installed in a "standard" directory (e.g.
85        /home/autotest, /usr/local/autotest) or a temporary directory. """
86        cls.install_in_tmpdir = flag
87
88
89    @classmethod
90    def get_client_autodir_paths(cls, host):
91        return global_config.global_config.get_config_value(
92                'AUTOSERV', 'client_autodir_paths', type=list)
93
94
95    @classmethod
96    def get_installed_autodir(cls, host):
97        """
98        Find where the Autotest client is installed on the host.
99        @returns an absolute path to an installed Autotest client root.
100        @raises AutodirNotFoundError if no Autotest installation can be found.
101        """
102        autodir = host.get_autodir()
103        if autodir:
104            logging.debug('Using existing host autodir: %s', autodir)
105            return autodir
106
107        for path in Autotest.get_client_autodir_paths(host):
108            try:
109                autotest_binary = os.path.join(path, 'bin', 'autotest')
110                host.run('test -x %s' % utils.sh_escape(autotest_binary))
111                host.run('test -w %s' % utils.sh_escape(path))
112                logging.debug('Found existing autodir at %s', path)
113                return path
114            except error.AutoservRunError:
115                logging.debug('%s does not exist on %s', autotest_binary,
116                              host.hostname)
117        raise AutodirNotFoundError
118
119
120    @classmethod
121    def get_install_dir(cls, host):
122        """
123        Determines the location where autotest should be installed on
124        host. If self.install_in_tmpdir is set, it will return a unique
125        temporary directory that autotest can be installed in. Otherwise, looks
126        for an existing installation to use; if none is found, looks for a
127        usable directory in the global config client_autodir_paths.
128        """
129        try:
130            install_dir = cls.get_installed_autodir(host)
131        except AutodirNotFoundError:
132            install_dir = cls._find_installable_dir(host)
133
134        if cls.install_in_tmpdir:
135            return host.get_tmp_dir(parent=install_dir)
136        return install_dir
137
138
139    @classmethod
140    def _find_installable_dir(cls, host):
141        client_autodir_paths = cls.get_client_autodir_paths(host)
142        for path in client_autodir_paths:
143            try:
144                host.run('mkdir -p %s' % utils.sh_escape(path))
145                host.run('test -w %s' % utils.sh_escape(path))
146                return path
147            except error.AutoservRunError:
148                logging.debug('Failed to create %s', path)
149        metadata = {'_type': 'AutoservInstallError',
150                    'hostname': host.hostname}
151        autotest_stats.Counter('AutoservInstallError',
152                               metadata=metadata).increment()
153        raise error.AutoservInstallError(
154                'Unable to find a place to install Autotest; tried %s' %
155                ', '.join(client_autodir_paths))
156
157
158    def get_fetch_location(self):
159        c = global_config.global_config
160        repos = c.get_config_value("PACKAGES", 'fetch_location', type=list,
161                                   default=[])
162        repos.reverse()
163        return repos
164
165
166    def install(self, host=None, autodir=None, use_packaging=True):
167        self._install(host=host, autodir=autodir, use_packaging=use_packaging)
168
169
170    def install_full_client(self, host=None, autodir=None):
171        self._install(host=host, autodir=autodir, use_autoserv=False,
172                      use_packaging=False)
173
174
175    def install_no_autoserv(self, host=None, autodir=None):
176        self._install(host=host, autodir=autodir, use_autoserv=False)
177
178
179    def _install_using_packaging(self, host, autodir):
180        repos = self.get_fetch_location()
181        if not repos:
182            raise error.PackageInstallError("No repos to install an "
183                                            "autotest client from")
184        pkgmgr = packages.PackageManager(autodir, hostname=host.hostname,
185                                         repo_urls=repos,
186                                         do_locking=False,
187                                         run_function=host.run,
188                                         run_function_dargs=dict(timeout=600))
189        # The packages dir is used to store all the packages that
190        # are fetched on that client. (for the tests,deps etc.
191        # too apart from the client)
192        pkg_dir = os.path.join(autodir, 'packages')
193        # clean up the autodir except for the packages directory
194        host.run('cd %s && ls | grep -v "^packages$"'
195                 ' | xargs rm -rf && rm -rf .[!.]*' % autodir)
196        pkgmgr.install_pkg('autotest', 'client', pkg_dir, autodir,
197                           preserve_install_dir=True)
198        self.installed = True
199
200
201    def _install_using_send_file(self, host, autodir):
202        dirs_to_exclude = set(["tests", "site_tests", "deps", "profilers",
203                               "packages"])
204        light_files = [os.path.join(self.source_material, f)
205                       for f in os.listdir(self.source_material)
206                       if f not in dirs_to_exclude]
207        host.send_file(light_files, autodir, delete_dest=True)
208
209        # create empty dirs for all the stuff we excluded
210        commands = []
211        for path in dirs_to_exclude:
212            abs_path = os.path.join(autodir, path)
213            abs_path = utils.sh_escape(abs_path)
214            commands.append("mkdir -p '%s'" % abs_path)
215            commands.append("touch '%s'/__init__.py" % abs_path)
216        host.run(';'.join(commands))
217
218
219    def _install(self, host=None, autodir=None, use_autoserv=True,
220                 use_packaging=True):
221        """
222        Install autotest.  If get() was not called previously, an
223        attempt will be made to install from the autotest svn
224        repository.
225
226        @param host A Host instance on which autotest will be installed
227        @param autodir Location on the remote host to install to
228        @param use_autoserv Enable install modes that depend on the client
229            running with the autoserv harness
230        @param use_packaging Enable install modes that use the packaging system
231
232        @exception AutoservError if a tarball was not specified and
233            the target host does not have svn installed in its path
234        """
235        if not host:
236            host = self.host
237        if not self.got:
238            self.get()
239        host.wait_up(timeout=30)
240        host.setup()
241        logging.info("Installing autotest on %s", host.hostname)
242
243        # set up the autotest directory on the remote machine
244        if not autodir:
245            autodir = self.get_install_dir(host)
246        logging.info('Using installation dir %s', autodir)
247        host.set_autodir(autodir)
248        host.run('mkdir -p %s' % utils.sh_escape(autodir))
249
250        # make sure there are no files in $AUTODIR/results
251        results_path = os.path.join(autodir, 'results')
252        host.run('rm -rf %s/*' % utils.sh_escape(results_path),
253                 ignore_status=True)
254
255        # Fetch the autotest client from the nearest repository
256        if use_packaging:
257            try:
258                self._install_using_packaging(host, autodir)
259                logging.info("Installation of autotest completed using the "
260                             "packaging system.")
261                return
262            except (error.PackageInstallError, error.AutoservRunError,
263                    global_config.ConfigError), e:
264                logging.info("Could not install autotest using the packaging "
265                             "system: %s. Trying other methods", e)
266        else:
267            # Delete the package checksum file to force dut updating local
268            # packages.
269            command = ('rm -f "%s"' %
270                       (os.path.join(autodir, base_packages.CHECKSUM_FILE)))
271            host.run(command)
272
273        # try to install from file or directory
274        if self.source_material:
275            c = global_config.global_config
276            supports_autoserv_packaging = c.get_config_value(
277                "PACKAGES", "serve_packages_from_autoserv", type=bool)
278            # Copy autotest recursively
279            if supports_autoserv_packaging and use_autoserv:
280                self._install_using_send_file(host, autodir)
281            else:
282                host.send_file(self.source_material, autodir, delete_dest=True)
283            logging.info("Installation of autotest completed from %s",
284                         self.source_material)
285            self.installed = True
286            return
287
288        # if that fails try to install using svn
289        if utils.run('which svn').exit_status:
290            raise error.AutoservError('svn not found on target machine: %s' %
291                                      host.name)
292        try:
293            host.run('svn checkout %s %s' % (AUTOTEST_SVN, autodir))
294        except error.AutoservRunError, e:
295            host.run('svn checkout %s %s' % (AUTOTEST_HTTP, autodir))
296        logging.info("Installation of autotest completed using SVN.")
297        self.installed = True
298
299
300    def uninstall(self, host=None):
301        """
302        Uninstall (i.e. delete) autotest. Removes the autotest client install
303        from the specified host.
304
305        @params host a Host instance from which the client will be removed
306        """
307        if not self.installed:
308            return
309        if not host:
310            host = self.host
311        autodir = host.get_autodir()
312        if not autodir:
313            return
314
315        # perform the actual uninstall
316        host.run("rm -rf %s" % utils.sh_escape(autodir), ignore_status=True)
317        host.set_autodir(None)
318        self.installed = False
319
320
321    def get(self, location=None):
322        if not location:
323            location = os.path.join(self.serverdir, '../client')
324            location = os.path.abspath(location)
325        # If there's stuff run on our client directory already, it
326        # can cause problems. Try giving it a quick clean first.
327        cwd = os.getcwd()
328        os.chdir(location)
329        try:
330            utils.system('tools/make_clean', ignore_status=True)
331        finally:
332            os.chdir(cwd)
333        super(BaseAutotest, self).get(location)
334        self.got = True
335
336
337    def run(self, control_file, results_dir='.', host=None, timeout=None,
338            tag=None, parallel_flag=False, background=False,
339            client_disconnect_timeout=None, use_packaging=True):
340        """
341        Run an autotest job on the remote machine.
342
343        @param control_file: An open file-like-obj of the control file.
344        @param results_dir: A str path where the results should be stored
345                on the local filesystem.
346        @param host: A Host instance on which the control file should
347                be run.
348        @param timeout: Maximum number of seconds to wait for the run or None.
349        @param tag: Tag name for the client side instance of autotest.
350        @param parallel_flag: Flag set when multiple jobs are run at the
351                same time.
352        @param background: Indicates that the client should be launched as
353                a background job; the code calling run will be responsible
354                for monitoring the client and collecting the results.
355        @param client_disconnect_timeout: Seconds to wait for the remote host
356                to come back after a reboot. Defaults to the host setting for
357                DEFAULT_REBOOT_TIMEOUT.
358
359        @raises AutotestRunError: If there is a problem executing
360                the control file.
361        """
362        host = self._get_host_and_setup(host, use_packaging=use_packaging)
363        results_dir = os.path.abspath(results_dir)
364
365        if client_disconnect_timeout is None:
366            client_disconnect_timeout = host.DEFAULT_REBOOT_TIMEOUT
367
368        if tag:
369            results_dir = os.path.join(results_dir, tag)
370
371        atrun = _Run(host, results_dir, tag, parallel_flag, background)
372        self._do_run(control_file, results_dir, host, atrun, timeout,
373                     client_disconnect_timeout, use_packaging=use_packaging)
374
375
376    def _get_host_and_setup(self, host, use_packaging=True):
377        if not host:
378            host = self.host
379        if not self.installed:
380            self.install(host, use_packaging=use_packaging)
381
382        host.wait_up(timeout=30)
383        return host
384
385
386    def _do_run(self, control_file, results_dir, host, atrun, timeout,
387                client_disconnect_timeout, use_packaging=True):
388        try:
389            atrun.verify_machine()
390        except:
391            logging.error("Verify failed on %s. Reinstalling autotest",
392                          host.hostname)
393            self.install(host)
394            atrun.verify_machine()
395        debug = os.path.join(results_dir, 'debug')
396        try:
397            os.makedirs(debug)
398        except Exception:
399            pass
400
401        delete_file_list = [atrun.remote_control_file,
402                            atrun.remote_control_file + '.state',
403                            atrun.manual_control_file,
404                            atrun.manual_control_file + '.state']
405        cmd = ';'.join('rm -f ' + control for control in delete_file_list)
406        host.run(cmd, ignore_status=True)
407
408        tmppath = utils.get(control_file, local_copy=True)
409
410        # build up the initialization prologue for the control file
411        prologue_lines = []
412
413        # Add the additional user arguments
414        prologue_lines.append("args = %r\n" % self.job.args)
415
416        # If the packaging system is being used, add the repository list.
417        repos = None
418        try:
419            if use_packaging:
420                repos = self.get_fetch_location()
421                prologue_lines.append('job.add_repository(%s)\n' % repos)
422            else:
423                logging.debug('use_packaging is set to False, do not add any '
424                              'repository.')
425        except global_config.ConfigError, e:
426            # If repos is defined packaging is enabled so log the error
427            if repos:
428                logging.error(e)
429
430        # on full-size installs, turn on any profilers the server is using
431        if not atrun.background:
432            running_profilers = host.job.profilers.add_log.iteritems()
433            for profiler, (args, dargs) in running_profilers:
434                call_args = [repr(profiler)]
435                call_args += [repr(arg) for arg in args]
436                call_args += ["%s=%r" % item for item in dargs.iteritems()]
437                prologue_lines.append("job.profilers.add(%s)\n"
438                                      % ", ".join(call_args))
439        cfile = "".join(prologue_lines)
440
441        cfile += open(tmppath).read()
442        open(tmppath, "w").write(cfile)
443
444        # Create and copy state file to remote_control_file + '.state'
445        state_file = host.job.preprocess_client_state()
446        host.send_file(state_file, atrun.remote_control_file + '.init.state')
447        os.remove(state_file)
448
449        # Copy control_file to remote_control_file on the host
450        host.send_file(tmppath, atrun.remote_control_file)
451        if os.path.abspath(tmppath) != os.path.abspath(control_file):
452            os.remove(tmppath)
453
454        atrun.execute_control(
455                timeout=timeout,
456                client_disconnect_timeout=client_disconnect_timeout)
457
458
459    def run_timed_test(self, test_name, results_dir='.', host=None,
460                       timeout=None, parallel_flag=False, background=False,
461                       client_disconnect_timeout=None, *args, **dargs):
462        """
463        Assemble a tiny little control file to just run one test,
464        and run it as an autotest client-side test
465        """
466        if not host:
467            host = self.host
468        if not self.installed:
469            self.install(host)
470        opts = ["%s=%s" % (o[0], repr(o[1])) for o in dargs.items()]
471        cmd = ", ".join([repr(test_name)] + map(repr, args) + opts)
472        control = "job.run_test(%s)\n" % cmd
473        self.run(control, results_dir, host, timeout=timeout,
474                 parallel_flag=parallel_flag, background=background,
475                 client_disconnect_timeout=client_disconnect_timeout)
476
477
478    def run_test(self, test_name, results_dir='.', host=None,
479                 parallel_flag=False, background=False,
480                 client_disconnect_timeout=None, *args, **dargs):
481        self.run_timed_test(test_name, results_dir, host, timeout=None,
482                            parallel_flag=parallel_flag, background=background,
483                            client_disconnect_timeout=client_disconnect_timeout,
484                            *args, **dargs)
485
486
487class _BaseRun(object):
488    """
489    Represents a run of autotest control file.  This class maintains
490    all the state necessary as an autotest control file is executed.
491
492    It is not intended to be used directly, rather control files
493    should be run using the run method in Autotest.
494    """
495    def __init__(self, host, results_dir, tag, parallel_flag, background):
496        self.host = host
497        self.results_dir = results_dir
498        self.env = host.env
499        self.tag = tag
500        self.parallel_flag = parallel_flag
501        self.background = background
502        self.autodir = Autotest.get_installed_autodir(self.host)
503        control = os.path.join(self.autodir, 'control')
504        if tag:
505            control += '.' + tag
506        self.manual_control_file = control
507        self.remote_control_file = control + '.autoserv'
508        self.config_file = os.path.join(self.autodir, 'global_config.ini')
509
510
511    def verify_machine(self):
512        binary = os.path.join(self.autodir, 'bin/autotest')
513        try:
514            self.host.run('ls %s > /dev/null 2>&1' % binary)
515        except:
516            raise error.AutoservInstallError(
517                "Autotest does not appear to be installed")
518
519        if not self.parallel_flag:
520            tmpdir = os.path.join(self.autodir, 'tmp')
521            download = os.path.join(self.autodir, 'tests/download')
522            self.host.run('umount %s' % tmpdir, ignore_status=True)
523            self.host.run('umount %s' % download, ignore_status=True)
524
525
526    def get_base_cmd_args(self, section):
527        args = ['--verbose']
528        if section > 0:
529            args.append('-c')
530        if self.tag:
531            args.append('-t %s' % self.tag)
532        if self.host.job.use_external_logging():
533            args.append('-l')
534        if self.host.hostname:
535            args.append('--hostname=%s' % self.host.hostname)
536        args.append('--user=%s' % self.host.job.user)
537
538        args.append(self.remote_control_file)
539        return args
540
541
542    def get_background_cmd(self, section):
543        cmd = ['nohup', os.path.join(self.autodir, 'bin/autotest_client')]
544        cmd += self.get_base_cmd_args(section)
545        cmd += ['>/dev/null', '2>/dev/null', '&']
546        return ' '.join(cmd)
547
548
549    def get_daemon_cmd(self, section, monitor_dir):
550        cmd = ['nohup', os.path.join(self.autodir, 'bin/autotestd'),
551               monitor_dir, '-H autoserv']
552        cmd += self.get_base_cmd_args(section)
553        cmd += ['>/dev/null', '2>/dev/null', '&']
554        return ' '.join(cmd)
555
556
557    def get_monitor_cmd(self, monitor_dir, stdout_read, stderr_read):
558        cmd = [os.path.join(self.autodir, 'bin', 'autotestd_monitor'),
559               monitor_dir, str(stdout_read), str(stderr_read)]
560        return ' '.join(cmd)
561
562
563    def get_client_log(self):
564        """Find what the "next" client.* prefix should be
565
566        @returns A string of the form client.INTEGER that should be prefixed
567            to all client debug log files.
568        """
569        max_digit = -1
570        debug_dir = os.path.join(self.results_dir, 'debug')
571        client_logs = glob.glob(os.path.join(debug_dir, 'client.*.*'))
572        for log in client_logs:
573            _, number, _ = log.split('.', 2)
574            if number.isdigit():
575                max_digit = max(max_digit, int(number))
576        return 'client.%d' % (max_digit + 1)
577
578
579    def copy_client_config_file(self, client_log_prefix=None):
580        """
581        Create and copy the client config file based on the server config.
582
583        @param client_log_prefix: Optional prefix to prepend to log files.
584        """
585        client_config_file = self._create_client_config_file(client_log_prefix)
586        self.host.send_file(client_config_file, self.config_file)
587        os.remove(client_config_file)
588
589
590    def _create_client_config_file(self, client_log_prefix=None):
591        """
592        Create a temporary file with the [CLIENT] section configuration values
593        taken from the server global_config.ini.
594
595        @param client_log_prefix: Optional prefix to prepend to log files.
596
597        @return: Path of the temporary file generated.
598        """
599        config = global_config.global_config.get_section_values('CLIENT')
600        if client_log_prefix:
601            config.set('CLIENT', 'default_logging_name', client_log_prefix)
602        return self._create_aux_file(config.write)
603
604
605    def _create_aux_file(self, func, *args):
606        """
607        Creates a temporary file and writes content to it according to a
608        content creation function. The file object is appended to *args, which
609        is then passed to the content creation function
610
611        @param func: Function that will be used to write content to the
612                temporary file.
613        @param *args: List of parameters that func takes.
614        @return: Path to the temporary file that was created.
615        """
616        fd, path = tempfile.mkstemp(dir=self.host.job.tmpdir)
617        aux_file = os.fdopen(fd, "w")
618        try:
619            list_args = list(args)
620            list_args.append(aux_file)
621            func(*list_args)
622        finally:
623            aux_file.close()
624        return path
625
626
627    @staticmethod
628    def is_client_job_finished(last_line):
629        return bool(re.match(r'^END .*\t----\t----\t.*$', last_line))
630
631
632    @staticmethod
633    def is_client_job_rebooting(last_line):
634        return bool(re.match(r'^\t*GOOD\t----\treboot\.start.*$', last_line))
635
636
637    def _diagnose_dut(self, old_boot_id=None):
638        """
639        Run diagnostic checks on a DUT.
640
641        1. ping: A dead host will not respond to pings.
642        2. ssh (happens with 3.): DUT hangs usually fail in authentication
643            but respond to pings.
644        3. Check if a reboot occured: A healthy but unexpected reboot leaves the
645            host running with a new boot id.
646
647        This method will always raise an exception from the AutotestFailure
648        family and should only get called when the reason for a test failing
649        is ambiguous.
650
651        @raises AutotestDeviceNotPingable: If the DUT doesn't respond to ping.
652        @raises AutotestDeviceNotSSHable: If we cannot SSH into the DUT.
653        @raises AutotestDeviceRebooted: If the boot id changed.
654        @raises AutotestAbort: If none of the above exceptions were raised.
655            Since we have no recourse we must abort at this stage.
656        """
657        msg = 'Autotest client terminated unexpectedly: '
658        if utils.ping(self.host.hostname, tries=1, deadline=1) != 0:
659            msg += 'DUT is no longer pingable, it may have rebooted or hung.\n'
660            raise AutotestDeviceNotPingable(msg)
661
662        if old_boot_id:
663            try:
664                new_boot_id = self.host.get_boot_id(timeout=60)
665            except Exception as e:
666                msg += ('DUT is pingable but not SSHable, it most likely'
667                        ' sporadically rebooted during testing. %s\n' % str(e))
668                raise AutotestDeviceNotSSHable(msg)
669            else:
670                if new_boot_id != old_boot_id:
671                    msg += 'DUT rebooted during the test run.\n'
672                    raise AutotestDeviceRebooted(msg)
673
674            msg += ('DUT is pingable, SSHable and did NOT restart '
675                    'un-expectedly. We probably lost connectivity during the '
676                    'test.')
677        else:
678            msg += ('DUT is pingable, could not determine if an un-expected '
679                    'reboot occured during the test.')
680
681        raise AutotestAbort(msg)
682
683
684    def log_unexpected_abort(self, stderr_redirector, old_boot_id=None):
685        """
686        Logs that something unexpected happened, then tries to diagnose the
687        failure. The purpose of this function is only to close out the status
688        log with the appropriate error message, not to critically terminate
689        the program.
690
691        @param stderr_redirector: log stream.
692        @param old_boot_id: boot id used to infer if a reboot occured.
693        """
694        stderr_redirector.flush_all_buffers()
695        try:
696            self._diagnose_dut(old_boot_id)
697        except AutotestFailure as e:
698            self.host.job.record('END ABORT', None, None, str(e))
699
700
701    def _execute_in_background(self, section, timeout):
702        full_cmd = self.get_background_cmd(section)
703        devnull = open(os.devnull, "w")
704
705        self.copy_client_config_file(self.get_client_log())
706
707        self.host.job.push_execution_context(self.results_dir)
708        try:
709            result = self.host.run(full_cmd, ignore_status=True,
710                                   timeout=timeout,
711                                   stdout_tee=devnull,
712                                   stderr_tee=devnull)
713        finally:
714            self.host.job.pop_execution_context()
715
716        return result
717
718
719    @staticmethod
720    def _strip_stderr_prologue(stderr):
721        """Strips the 'standard' prologue that get pre-pended to every
722        remote command and returns the text that was actually written to
723        stderr by the remote command."""
724        stderr_lines = stderr.split("\n")[1:]
725        if not stderr_lines:
726            return ""
727        elif stderr_lines[0].startswith("NOTE: autotestd_monitor"):
728            del stderr_lines[0]
729        return "\n".join(stderr_lines)
730
731
732    def _execute_daemon(self, section, timeout, stderr_redirector,
733                        client_disconnect_timeout):
734        monitor_dir = self.host.get_tmp_dir()
735        daemon_cmd = self.get_daemon_cmd(section, monitor_dir)
736
737        # grab the location for the server-side client log file
738        client_log_prefix = self.get_client_log()
739        client_log_path = os.path.join(self.results_dir, 'debug',
740                                       client_log_prefix + '.log')
741        client_log = open(client_log_path, 'w', 0)
742        self.copy_client_config_file(client_log_prefix)
743
744        stdout_read = stderr_read = 0
745        self.host.job.push_execution_context(self.results_dir)
746        try:
747            self.host.run(daemon_cmd, ignore_status=True, timeout=timeout)
748            disconnect_warnings = []
749            while True:
750                monitor_cmd = self.get_monitor_cmd(monitor_dir, stdout_read,
751                                                   stderr_read)
752                try:
753                    result = self.host.run(monitor_cmd, ignore_status=True,
754                                           timeout=timeout,
755                                           stdout_tee=client_log,
756                                           stderr_tee=stderr_redirector)
757                except error.AutoservRunError, e:
758                    result = e.result_obj
759                    result.exit_status = None
760                    disconnect_warnings.append(e.description)
761
762                    stderr_redirector.log_warning(
763                        "Autotest client was disconnected: %s" % e.description,
764                        "NETWORK")
765                except error.AutoservSSHTimeout:
766                    result = utils.CmdResult(monitor_cmd, "", "", None, 0)
767                    stderr_redirector.log_warning(
768                        "Attempt to connect to Autotest client timed out",
769                        "NETWORK")
770
771                stdout_read += len(result.stdout)
772                stderr_read += len(self._strip_stderr_prologue(result.stderr))
773
774                if result.exit_status is not None:
775                    # TODO (crosbug.com/38224)- sbasi: Remove extra logging.
776                    logging.debug('Result exit status is %d.',
777                                  result.exit_status)
778                    return result
779                elif not self.host.wait_up(client_disconnect_timeout):
780                    raise error.AutoservSSHTimeout(
781                        "client was disconnected, reconnect timed out")
782        finally:
783            client_log.close()
784            self.host.job.pop_execution_context()
785
786
787    def execute_section(self, section, timeout, stderr_redirector,
788                        client_disconnect_timeout):
789        logging.info("Executing %s/bin/autotest %s/control phase %d",
790                     self.autodir, self.autodir, section)
791
792        if self.background:
793            result = self._execute_in_background(section, timeout)
794        else:
795            result = self._execute_daemon(section, timeout, stderr_redirector,
796                                          client_disconnect_timeout)
797
798        last_line = stderr_redirector.last_line
799
800        # check if we failed hard enough to warrant an exception
801        if result.exit_status == 1:
802            err = error.AutotestRunError("client job was aborted")
803        elif not self.background and not result.stderr:
804            err = error.AutotestRunError(
805                "execute_section %s failed to return anything\n"
806                "stdout:%s\n" % (section, result.stdout))
807        else:
808            err = None
809
810        # log something if the client failed AND never finished logging
811        if err and not self.is_client_job_finished(last_line):
812            self.log_unexpected_abort(stderr_redirector)
813
814        if err:
815            raise err
816        else:
817            return stderr_redirector.last_line
818
819
820    def _wait_for_reboot(self, old_boot_id):
821        logging.info("Client is rebooting")
822        logging.info("Waiting for client to halt")
823        if not self.host.wait_down(self.host.WAIT_DOWN_REBOOT_TIMEOUT,
824                                   old_boot_id=old_boot_id):
825            err = "%s failed to shutdown after %d"
826            err %= (self.host.hostname, self.host.WAIT_DOWN_REBOOT_TIMEOUT)
827            raise error.AutotestRunError(err)
828        logging.info("Client down, waiting for restart")
829        if not self.host.wait_up(self.host.DEFAULT_REBOOT_TIMEOUT):
830            # since reboot failed
831            # hardreset the machine once if possible
832            # before failing this control file
833            warning = "%s did not come back up, hard resetting"
834            warning %= self.host.hostname
835            logging.warning(warning)
836            try:
837                self.host.hardreset(wait=False)
838            except (AttributeError, error.AutoservUnsupportedError):
839                warning = "Hard reset unsupported on %s"
840                warning %= self.host.hostname
841                logging.warning(warning)
842            raise error.AutotestRunError("%s failed to boot after %ds" %
843                                         (self.host.hostname,
844                                          self.host.DEFAULT_REBOOT_TIMEOUT))
845        self.host.reboot_followup()
846
847
848    def execute_control(self, timeout=None, client_disconnect_timeout=None):
849        if not self.background:
850            collector = log_collector(self.host, self.tag, self.results_dir)
851            hostname = self.host.hostname
852            remote_results = collector.client_results_dir
853            local_results = collector.server_results_dir
854            self.host.job.add_client_log(hostname, remote_results,
855                                         local_results)
856            job_record_context = self.host.job.get_record_context()
857
858        section = 0
859        start_time = time.time()
860
861        logger = client_logger(self.host, self.tag, self.results_dir)
862        try:
863            while not timeout or time.time() < start_time + timeout:
864                if timeout:
865                    section_timeout = start_time + timeout - time.time()
866                else:
867                    section_timeout = None
868                boot_id = self.host.get_boot_id()
869                last = self.execute_section(section, section_timeout,
870                                            logger, client_disconnect_timeout)
871                if self.background:
872                    return
873                section += 1
874                if self.is_client_job_finished(last):
875                    logging.info("Client complete")
876                    return
877                elif self.is_client_job_rebooting(last):
878                    try:
879                        self._wait_for_reboot(boot_id)
880                    except error.AutotestRunError, e:
881                        self.host.job.record("ABORT", None, "reboot", str(e))
882                        self.host.job.record("END ABORT", None, None, str(e))
883                        raise
884                    continue
885
886                # If a test fails without probable cause we try to bucket it's
887                # failure into one of 2 categories. If we can determine the
888                # current state of the device and it is suspicious, we close the
889                # status lines indicating a failure. If we either cannot
890                # determine the state of the device, or it appears totally
891                # healthy, we give up and abort.
892                try:
893                    self._diagnose_dut(boot_id)
894                except AutotestDeviceError as e:
895                    # The status lines of the test are pretty much tailed to
896                    # our log, with indentation, from the client job on the DUT.
897                    # So if the DUT goes down unexpectedly we'll end up with a
898                    # malformed status log unless we manually unwind the status
899                    # stack. Ideally we would want to write a nice wrapper like
900                    # server_job methods run_reboot, run_group but they expect
901                    # reboots and we don't.
902                    self.host.job.record('FAIL', None, None, str(e))
903                    self.host.job.record('END FAIL', None, None)
904                    self.host.job.record('END GOOD', None, None)
905                    self.host.job.failed_with_device_error = True
906                    return
907                except AutotestAbort as e:
908                    self.host.job.record('ABORT', None, None, str(e))
909                    self.host.job.record('END ABORT', None, None)
910
911                    # give the client machine a chance to recover from a crash
912                    self.host.wait_up(
913                        self.host.HOURS_TO_WAIT_FOR_RECOVERY * 3600)
914                    msg = ("Aborting - unexpected final status message from "
915                           "client on %s: %s\n") % (self.host.hostname, last)
916                    raise error.AutotestRunError(msg)
917        finally:
918            logger.close()
919            if not self.background:
920                collector.collect_client_job_results()
921                collector.remove_redundant_client_logs()
922                state_file = os.path.basename(self.remote_control_file
923                                              + '.state')
924                state_path = os.path.join(self.results_dir, state_file)
925                self.host.job.postprocess_client_state(state_path)
926                self.host.job.remove_client_log(hostname, remote_results,
927                                                local_results)
928                job_record_context.restore()
929
930        # should only get here if we timed out
931        assert timeout
932        raise error.AutotestTimeoutError()
933
934
935class log_collector(object):
936    def __init__(self, host, client_tag, results_dir):
937        self.host = host
938        if not client_tag:
939            client_tag = "default"
940        self.client_results_dir = os.path.join(host.get_autodir(), "results",
941                                               client_tag)
942        self.server_results_dir = results_dir
943
944
945    def collect_client_job_results(self):
946        """ A method that collects all the current results of a running
947        client job into the results dir. By default does nothing as no
948        client job is running, but when running a client job you can override
949        this with something that will actually do something. """
950
951        # make an effort to wait for the machine to come up
952        try:
953            self.host.wait_up(timeout=30)
954        except error.AutoservError:
955            # don't worry about any errors, we'll try and
956            # get the results anyway
957            pass
958
959        # Copy all dirs in default to results_dir
960        timer = autotest_stats.Timer('collect_client_job_results')
961        timer.start()
962        try:
963            self.host.get_file(self.client_results_dir + '/',
964                               self.server_results_dir, preserve_symlinks=True)
965
966            # Only report time used for successful get_file calls.
967            timer.stop();
968        except Exception:
969            # well, don't stop running just because we couldn't get logs
970            e_msg = "Unexpected error copying test result logs, continuing ..."
971            logging.error(e_msg)
972            traceback.print_exc(file=sys.stdout)
973
974
975    def remove_redundant_client_logs(self):
976        """Remove client.*.log files in favour of client.*.DEBUG files."""
977        debug_dir = os.path.join(self.server_results_dir, 'debug')
978        debug_files = [f for f in os.listdir(debug_dir)
979                       if re.search(r'^client\.\d+\.DEBUG$', f)]
980        for debug_file in debug_files:
981            log_file = debug_file.replace('DEBUG', 'log')
982            log_file = os.path.join(debug_dir, log_file)
983            if os.path.exists(log_file):
984                os.remove(log_file)
985
986
987# a file-like object for catching stderr from an autotest client and
988# extracting status logs from it
989class BaseClientLogger(object):
990    """Partial file object to write to both stdout and
991    the status log file.  We only implement those methods
992    utils.run() actually calls.
993    """
994    status_parser = re.compile(r"^AUTOTEST_STATUS:([^:]*):(.*)$")
995    test_complete_parser = re.compile(r"^AUTOTEST_TEST_COMPLETE:(.*)$")
996    fetch_package_parser = re.compile(
997        r"^AUTOTEST_FETCH_PACKAGE:([^:]*):([^:]*):(.*)$")
998    extract_indent = re.compile(r"^(\t*).*$")
999    extract_timestamp = re.compile(r".*\ttimestamp=(\d+)\t.*$")
1000
1001    def __init__(self, host, tag, server_results_dir):
1002        self.host = host
1003        self.job = host.job
1004        self.log_collector = log_collector(host, tag, server_results_dir)
1005        self.leftover = ""
1006        self.last_line = ""
1007        self.logs = {}
1008
1009
1010    def _process_log_dict(self, log_dict):
1011        log_list = log_dict.pop("logs", [])
1012        for key in sorted(log_dict.iterkeys()):
1013            log_list += self._process_log_dict(log_dict.pop(key))
1014        return log_list
1015
1016
1017    def _process_logs(self):
1018        """Go through the accumulated logs in self.log and print them
1019        out to stdout and the status log. Note that this processes
1020        logs in an ordering where:
1021
1022        1) logs to different tags are never interleaved
1023        2) logs to x.y come before logs to x.y.z for all z
1024        3) logs to x.y come before x.z whenever y < z
1025
1026        Note that this will in general not be the same as the
1027        chronological ordering of the logs. However, if a chronological
1028        ordering is desired that one can be reconstructed from the
1029        status log by looking at timestamp lines."""
1030        log_list = self._process_log_dict(self.logs)
1031        for entry in log_list:
1032            self.job.record_entry(entry, log_in_subdir=False)
1033        if log_list:
1034            self.last_line = log_list[-1].render()
1035
1036
1037    def _process_quoted_line(self, tag, line):
1038        """Process a line quoted with an AUTOTEST_STATUS flag. If the
1039        tag is blank then we want to push out all the data we've been
1040        building up in self.logs, and then the newest line. If the
1041        tag is not blank, then push the line into the logs for handling
1042        later."""
1043        entry = base_job.status_log_entry.parse(line)
1044        if entry is None:
1045            return  # the line contains no status lines
1046        if tag == "":
1047            self._process_logs()
1048            self.job.record_entry(entry, log_in_subdir=False)
1049            self.last_line = line
1050        else:
1051            tag_parts = [int(x) for x in tag.split(".")]
1052            log_dict = self.logs
1053            for part in tag_parts:
1054                log_dict = log_dict.setdefault(part, {})
1055            log_list = log_dict.setdefault("logs", [])
1056            log_list.append(entry)
1057
1058
1059    def _process_info_line(self, line):
1060        """Check if line is an INFO line, and if it is, interpret any control
1061        messages (e.g. enabling/disabling warnings) that it may contain."""
1062        match = re.search(r"^\t*INFO\t----\t----(.*)\t[^\t]*$", line)
1063        if not match:
1064            return   # not an INFO line
1065        for field in match.group(1).split('\t'):
1066            if field.startswith("warnings.enable="):
1067                func = self.job.warning_manager.enable_warnings
1068            elif field.startswith("warnings.disable="):
1069                func = self.job.warning_manager.disable_warnings
1070            else:
1071                continue
1072            warning_type = field.split("=", 1)[1]
1073            func(warning_type)
1074
1075
1076    def _process_line(self, line):
1077        """Write out a line of data to the appropriate stream. Status
1078        lines sent by autotest will be prepended with
1079        "AUTOTEST_STATUS", and all other lines are ssh error
1080        messages."""
1081        status_match = self.status_parser.search(line)
1082        test_complete_match = self.test_complete_parser.search(line)
1083        fetch_package_match = self.fetch_package_parser.search(line)
1084        if status_match:
1085            tag, line = status_match.groups()
1086            self._process_info_line(line)
1087            self._process_quoted_line(tag, line)
1088        elif test_complete_match:
1089            self._process_logs()
1090            fifo_path, = test_complete_match.groups()
1091            try:
1092                self.log_collector.collect_client_job_results()
1093                self.host.run("echo A > %s" % fifo_path)
1094            except Exception:
1095                msg = "Post-test log collection failed, continuing anyway"
1096                logging.exception(msg)
1097        elif fetch_package_match:
1098            pkg_name, dest_path, fifo_path = fetch_package_match.groups()
1099            serve_packages = global_config.global_config.get_config_value(
1100                "PACKAGES", "serve_packages_from_autoserv", type=bool)
1101            if serve_packages and pkg_name.endswith(".tar.bz2"):
1102                try:
1103                    self._send_tarball(pkg_name, dest_path)
1104                except Exception:
1105                    msg = "Package tarball creation failed, continuing anyway"
1106                    logging.exception(msg)
1107            try:
1108                self.host.run("echo B > %s" % fifo_path)
1109            except Exception:
1110                msg = "Package tarball installation failed, continuing anyway"
1111                logging.exception(msg)
1112        else:
1113            logging.info(line)
1114
1115
1116    def _send_tarball(self, pkg_name, remote_dest):
1117        name, pkg_type = self.job.pkgmgr.parse_tarball_name(pkg_name)
1118        src_dirs = []
1119        if pkg_type == 'test':
1120            for test_dir in ['site_tests', 'tests']:
1121                src_dir = os.path.join(self.job.clientdir, test_dir, name)
1122                if os.path.exists(src_dir):
1123                    src_dirs += [src_dir]
1124                    if autoserv_prebuild:
1125                        prebuild.setup(self.job.clientdir, src_dir)
1126                    break
1127        elif pkg_type == 'profiler':
1128            src_dirs += [os.path.join(self.job.clientdir, 'profilers', name)]
1129            if autoserv_prebuild:
1130                prebuild.setup(self.job.clientdir, src_dir)
1131        elif pkg_type == 'dep':
1132            src_dirs += [os.path.join(self.job.clientdir, 'deps', name)]
1133        elif pkg_type == 'client':
1134            return  # you must already have a client to hit this anyway
1135        else:
1136            return  # no other types are supported
1137
1138        # iterate over src_dirs until we find one that exists, then tar it
1139        for src_dir in src_dirs:
1140            if os.path.exists(src_dir):
1141                try:
1142                    logging.info('Bundling %s into %s', src_dir, pkg_name)
1143                    temp_dir = autotemp.tempdir(unique_id='autoserv-packager',
1144                                                dir=self.job.tmpdir)
1145                    tarball_path = self.job.pkgmgr.tar_package(
1146                        pkg_name, src_dir, temp_dir.name, " .")
1147                    self.host.send_file(tarball_path, remote_dest)
1148                finally:
1149                    temp_dir.clean()
1150                return
1151
1152
1153    def log_warning(self, msg, warning_type):
1154        """Injects a WARN message into the current status logging stream."""
1155        timestamp = int(time.time())
1156        if self.job.warning_manager.is_valid(timestamp, warning_type):
1157            self.job.record('WARN', None, None, msg)
1158
1159
1160    def write(self, data):
1161        # now start processing the existing buffer and the new data
1162        data = self.leftover + data
1163        lines = data.split('\n')
1164        processed_lines = 0
1165        try:
1166            # process all the buffered data except the last line
1167            # ignore the last line since we may not have all of it yet
1168            for line in lines[:-1]:
1169                self._process_line(line)
1170                processed_lines += 1
1171        finally:
1172            # save any unprocessed lines for future processing
1173            self.leftover = '\n'.join(lines[processed_lines:])
1174
1175
1176    def flush(self):
1177        sys.stdout.flush()
1178
1179
1180    def flush_all_buffers(self):
1181        if self.leftover:
1182            self._process_line(self.leftover)
1183            self.leftover = ""
1184        self._process_logs()
1185        self.flush()
1186
1187
1188    def close(self):
1189        self.flush_all_buffers()
1190
1191
1192SiteAutotest = client_utils.import_site_class(
1193    __file__, "autotest_lib.server.site_autotest", "SiteAutotest",
1194    BaseAutotest)
1195
1196
1197_SiteRun = client_utils.import_site_class(
1198    __file__, "autotest_lib.server.site_autotest", "_SiteRun", _BaseRun)
1199
1200
1201SiteClientLogger = client_utils.import_site_class(
1202    __file__, "autotest_lib.server.site_autotest", "SiteClientLogger",
1203    BaseClientLogger)
1204
1205
1206class Autotest(SiteAutotest):
1207    pass
1208
1209
1210class client_logger(SiteClientLogger):
1211    pass
1212
1213
1214class _Run(_SiteRun):
1215    pass
1216