• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2007 Google Inc. Released under the GPL v2
2#pylint: disable-msg=C0111
3
4import glob
5import logging
6import os
7import re
8import sys
9import tempfile
10import time
11import traceback
12
13import common
14from autotest_lib.client.bin.result_tools import runner as result_tools_runner
15from autotest_lib.client.common_lib import autotemp
16from autotest_lib.client.common_lib import base_job
17from autotest_lib.client.common_lib import error
18from autotest_lib.client.common_lib import packages
19from autotest_lib.client.common_lib import global_config
20from autotest_lib.client.common_lib import utils as client_utils
21from autotest_lib.server import installable_object
22from autotest_lib.server import prebuild
23from autotest_lib.server import utils
24
25try:
26    from chromite.lib import metrics
27except ImportError:
28    metrics = client_utils.metrics_mock
29
30
31AUTOTEST_SVN = 'svn://test.kernel.org/autotest/trunk/client'
32AUTOTEST_HTTP = 'http://test.kernel.org/svn/autotest/trunk/client'
33
34CONFIG = global_config.global_config
35AUTOSERV_PREBUILD = CONFIG.get_config_value(
36        'AUTOSERV', 'enable_server_prebuild', type=bool, default=False)
37
38ENABLE_RESULT_THROTTLING = CONFIG.get_config_value(
39        'AUTOSERV', 'enable_result_throttling', type=bool, default=False)
40
41class AutodirNotFoundError(Exception):
42    """No Autotest installation could be found."""
43
44
45class AutotestFailure(Exception):
46    """Gereric exception class for failures during a test run."""
47
48
49class AutotestAbort(AutotestFailure):
50    """
51    AutotestAborts are thrown when the DUT seems fine,
52    and the test doesn't give us an explicit reason for
53    failure; In this case we have no choice but to abort.
54    """
55
56
57class AutotestDeviceError(AutotestFailure):
58    """
59    Exceptions that inherit from AutotestDeviceError
60    are thrown when we can determine the current
61    state of the DUT and conclude that it probably
62    lead to the test failing; these exceptions lead
63    to failures instead of aborts.
64    """
65
66
67class AutotestDeviceNotPingable(AutotestDeviceError):
68    """Error for when a DUT becomes unpingable."""
69
70
71class AutotestDeviceNotSSHable(AutotestDeviceError):
72    """Error for when a DUT is pingable but not SSHable."""
73
74
75class AutotestDeviceRebooted(AutotestDeviceError):
76    """Error for when a DUT rebooted unexpectedly."""
77
78
79class BaseAutotest(installable_object.InstallableObject):
80    """
81    This class represents the Autotest program.
82
83    Autotest is used to run tests automatically and collect the results.
84    It also supports profilers.
85
86    Implementation details:
87    This is a leaf class in an abstract class hierarchy, it must
88    implement the unimplemented methods in parent classes.
89    """
90
91    def __init__(self, host=None):
92        self.host = host
93        self.got = False
94        self.installed = False
95        self.serverdir = utils.get_server_dir()
96        super(BaseAutotest, self).__init__()
97
98
99    install_in_tmpdir = False
100    @classmethod
101    def set_install_in_tmpdir(cls, flag):
102        """ Sets a flag that controls whether or not Autotest should by
103        default be installed in a "standard" directory (e.g.
104        /home/autotest, /usr/local/autotest) or a temporary directory. """
105        cls.install_in_tmpdir = flag
106
107
108    @classmethod
109    def get_client_autodir_paths(cls, host):
110        return global_config.global_config.get_config_value(
111                'AUTOSERV', 'client_autodir_paths', type=list)
112
113
114    @classmethod
115    def get_installed_autodir(cls, host):
116        """
117        Find where the Autotest client is installed on the host.
118        @returns an absolute path to an installed Autotest client root.
119        @raises AutodirNotFoundError if no Autotest installation can be found.
120        """
121        autodir = host.get_autodir()
122        if autodir:
123            logging.debug('Using existing host autodir: %s', autodir)
124            return autodir
125
126        for path in Autotest.get_client_autodir_paths(host):
127            try:
128                autotest_binary = os.path.join(path, 'bin', 'autotest')
129                host.run('test -x %s' % utils.sh_escape(autotest_binary))
130                host.run('test -w %s' % utils.sh_escape(path))
131                logging.debug('Found existing autodir at %s', path)
132                return path
133            except error.GenericHostRunError:
134                logging.debug('%s does not exist on %s', autotest_binary,
135                              host.hostname)
136        raise AutodirNotFoundError
137
138
139    @classmethod
140    def get_install_dir(cls, host):
141        """
142        Determines the location where autotest should be installed on
143        host. If self.install_in_tmpdir is set, it will return a unique
144        temporary directory that autotest can be installed in. Otherwise, looks
145        for an existing installation to use; if none is found, looks for a
146        usable directory in the global config client_autodir_paths.
147        """
148        try:
149            install_dir = cls.get_installed_autodir(host)
150        except AutodirNotFoundError:
151            install_dir = cls._find_installable_dir(host)
152
153        if cls.install_in_tmpdir:
154            return host.get_tmp_dir(parent=install_dir)
155        return install_dir
156
157
158    @classmethod
159    def _find_installable_dir(cls, host):
160        client_autodir_paths = cls.get_client_autodir_paths(host)
161        for path in client_autodir_paths:
162            try:
163                host.run('mkdir -p %s' % utils.sh_escape(path))
164                host.run('test -w %s' % utils.sh_escape(path))
165                return path
166            except error.AutoservRunError:
167                logging.debug('Failed to create %s', path)
168        metrics.Counter(
169            'chromeos/autotest/errors/no_autotest_install_path').increment(
170                fields={'dut_host_name': host.hostname})
171        raise error.AutoservInstallError(
172                'Unable to find a place to install Autotest; tried %s' %
173                ', '.join(client_autodir_paths))
174
175
176    def get_fetch_location(self):
177        c = global_config.global_config
178        repos = c.get_config_value("PACKAGES", 'fetch_location', type=list,
179                                   default=[])
180        repos.reverse()
181        return repos
182
183
184    def install(self, host=None, autodir=None, use_packaging=True):
185        self._install(host=host, autodir=autodir, use_packaging=use_packaging)
186
187
188    def install_full_client(self, host=None, autodir=None):
189        self._install(host=host, autodir=autodir, use_autoserv=False,
190                      use_packaging=False)
191
192
193    def install_no_autoserv(self, host=None, autodir=None):
194        self._install(host=host, autodir=autodir, use_autoserv=False)
195
196
197    def _install_using_packaging(self, host, autodir):
198        repos = self.get_fetch_location()
199        if not repos:
200            raise error.PackageInstallError("No repos to install an "
201                                            "autotest client from")
202        pkgmgr = packages.PackageManager(autodir, hostname=host.hostname,
203                                         repo_urls=repos,
204                                         do_locking=False,
205                                         run_function=host.run,
206                                         run_function_dargs=dict(timeout=600))
207        # The packages dir is used to store all the packages that
208        # are fetched on that client. (for the tests,deps etc.
209        # too apart from the client)
210        pkg_dir = os.path.join(autodir, 'packages')
211        # clean up the autodir except for the packages directory
212        host.run('cd %s && ls | grep -v "^packages$"'
213                 ' | xargs rm -rf && rm -rf .[!.]*' % autodir)
214        pkgmgr.install_pkg('autotest', 'client', pkg_dir, autodir,
215                           preserve_install_dir=True)
216        self.installed = True
217
218
219    def _install_using_send_file(self, host, autodir):
220        dirs_to_exclude = set(["tests", "site_tests", "deps", "profilers",
221                               "packages"])
222        light_files = [os.path.join(self.source_material, f)
223                       for f in os.listdir(self.source_material)
224                       if f not in dirs_to_exclude]
225        host.send_file(light_files, autodir, delete_dest=True)
226
227        # create empty dirs for all the stuff we excluded
228        commands = []
229        for path in dirs_to_exclude:
230            abs_path = os.path.join(autodir, path)
231            abs_path = utils.sh_escape(abs_path)
232            commands.append("mkdir -p '%s'" % abs_path)
233            commands.append("touch '%s'/__init__.py" % abs_path)
234        host.run(';'.join(commands))
235
236
237    def _install(self, host=None, autodir=None, use_autoserv=True,
238                 use_packaging=True):
239        """
240        Install autotest.  If get() was not called previously, an
241        attempt will be made to install from the autotest svn
242        repository.
243
244        @param host A Host instance on which autotest will be installed
245        @param autodir Location on the remote host to install to
246        @param use_autoserv Enable install modes that depend on the client
247            running with the autoserv harness
248        @param use_packaging Enable install modes that use the packaging system
249
250        @exception AutoservError if a tarball was not specified and
251            the target host does not have svn installed in its path
252        """
253        if not host:
254            host = self.host
255        if not self.got:
256            self.get()
257        host.wait_up(timeout=30)
258        host.setup()
259        logging.info("Installing autotest on %s", host.hostname)
260
261        # set up the autotest directory on the remote machine
262        if not autodir:
263            autodir = self.get_install_dir(host)
264        logging.info('Using installation dir %s', autodir)
265        host.set_autodir(autodir)
266        host.run('mkdir -p %s' % utils.sh_escape(autodir))
267
268        # make sure there are no files in $AUTODIR/results
269        results_path = os.path.join(autodir, 'results')
270        host.run('rm -rf %s/*' % utils.sh_escape(results_path),
271                 ignore_status=True)
272
273        # Fetch the autotest client from the nearest repository
274        if use_packaging:
275            try:
276                self._install_using_packaging(host, autodir)
277                logging.info("Installation of autotest completed using the "
278                             "packaging system.")
279                return
280            except (error.PackageInstallError, error.AutoservRunError,
281                    global_config.ConfigError), e:
282                logging.info("Could not install autotest using the packaging "
283                             "system: %s. Trying other methods", e)
284        else:
285            # Delete the package checksum file to force dut updating local
286            # packages.
287            command = ('rm -f "%s"' %
288                       (os.path.join(autodir, packages.CHECKSUM_FILE)))
289            host.run(command)
290
291        # try to install from file or directory
292        if self.source_material:
293            c = global_config.global_config
294            supports_autoserv_packaging = c.get_config_value(
295                "PACKAGES", "serve_packages_from_autoserv", type=bool)
296            # Copy autotest recursively
297            if supports_autoserv_packaging and use_autoserv:
298                self._install_using_send_file(host, autodir)
299            else:
300                host.send_file(self.source_material, autodir, delete_dest=True)
301            logging.info("Installation of autotest completed from %s",
302                         self.source_material)
303            self.installed = True
304            return
305
306        # if that fails try to install using svn
307        if utils.run('which svn').exit_status:
308            raise error.AutoservError('svn not found on target machine: %s' %
309                                      host.hostname)
310        try:
311            host.run('svn checkout %s %s' % (AUTOTEST_SVN, autodir))
312        except error.AutoservRunError, e:
313            host.run('svn checkout %s %s' % (AUTOTEST_HTTP, autodir))
314        logging.info("Installation of autotest completed using SVN.")
315        self.installed = True
316
317
318    def uninstall(self, host=None):
319        """
320        Uninstall (i.e. delete) autotest. Removes the autotest client install
321        from the specified host.
322
323        @params host a Host instance from which the client will be removed
324        """
325        if not self.installed:
326            return
327        if not host:
328            host = self.host
329        autodir = host.get_autodir()
330        if not autodir:
331            return
332
333        # perform the actual uninstall
334        host.run("rm -rf %s" % utils.sh_escape(autodir), ignore_status=True)
335        host.set_autodir(None)
336        self.installed = False
337
338
339    def get(self, location=None):
340        if not location:
341            location = os.path.join(self.serverdir, '../client')
342            location = os.path.abspath(location)
343        # If there's stuff run on our client directory already, it
344        # can cause problems. Try giving it a quick clean first.
345        cwd = os.getcwd()
346        os.chdir(location)
347        try:
348            utils.system('tools/make_clean', ignore_status=True)
349        finally:
350            os.chdir(cwd)
351        super(BaseAutotest, self).get(location)
352        self.got = True
353
354
355    def run(self, control_file, results_dir='.', host=None, timeout=None,
356            tag=None, parallel_flag=False, client_disconnect_timeout=None,
357            use_packaging=True):
358        """
359        Run an autotest job on the remote machine.
360
361        @param control_file: An open file-like-obj of the control file.
362        @param results_dir: A str path where the results should be stored
363                on the local filesystem.
364        @param host: A Host instance on which the control file should
365                be run.
366        @param timeout: Maximum number of seconds to wait for the run or None.
367        @param tag: Tag name for the client side instance of autotest.
368        @param parallel_flag: Flag set when multiple jobs are run at the
369                same time.
370        @param client_disconnect_timeout: Seconds to wait for the remote host
371                to come back after a reboot. Defaults to the host setting for
372                DEFAULT_REBOOT_TIMEOUT.
373
374        @raises AutotestRunError: If there is a problem executing
375                the control file.
376        """
377        host = self._get_host_and_setup(host, use_packaging=use_packaging)
378        logging.debug('Autotest job starts on remote host: %s',
379                      host.hostname)
380        results_dir = os.path.abspath(results_dir)
381
382        if client_disconnect_timeout is None:
383            client_disconnect_timeout = host.DEFAULT_REBOOT_TIMEOUT
384
385        if tag:
386            results_dir = os.path.join(results_dir, tag)
387
388        atrun = _Run(host, results_dir, tag, parallel_flag)
389        self._do_run(control_file, results_dir, host, atrun, timeout,
390                     client_disconnect_timeout, use_packaging=use_packaging)
391
392
393    def _get_host_and_setup(self, host, use_packaging=True):
394        if not host:
395            host = self.host
396        if not self.installed:
397            self.install(host, use_packaging=use_packaging)
398
399        host.wait_up(timeout=30)
400        return host
401
402
403    def _do_run(self, control_file, results_dir, host, atrun, timeout,
404                client_disconnect_timeout, use_packaging=True):
405        try:
406            atrun.verify_machine()
407        except:
408            logging.error("Verify failed on %s. Reinstalling autotest",
409                          host.hostname)
410            self.install(host)
411            atrun.verify_machine()
412        debug = os.path.join(results_dir, 'debug')
413        try:
414            os.makedirs(debug)
415        except Exception:
416            pass
417
418        delete_file_list = [atrun.remote_control_file,
419                            atrun.remote_control_file + '.state',
420                            atrun.manual_control_file,
421                            atrun.manual_control_file + '.state']
422        cmd = ';'.join('rm -f ' + control for control in delete_file_list)
423        host.run(cmd, ignore_status=True)
424
425        tmppath = utils.get(control_file, local_copy=True)
426
427        # build up the initialization prologue for the control file
428        prologue_lines = []
429
430        # Add the additional user arguments
431        prologue_lines.append("args = %r\n" % self.job.args)
432
433        # If the packaging system is being used, add the repository list.
434        repos = None
435        try:
436            if use_packaging:
437                repos = self.get_fetch_location()
438                prologue_lines.append('job.add_repository(%s)\n' % repos)
439            else:
440                logging.debug('use_packaging is set to False, do not add any '
441                              'repository.')
442        except global_config.ConfigError, e:
443            # If repos is defined packaging is enabled so log the error
444            if repos:
445                logging.error(e)
446
447        # on full-size installs, turn on any profilers the server is using
448        running_profilers = host.job.profilers.add_log.iteritems()
449        for profiler, (args, dargs) in running_profilers:
450            call_args = [repr(profiler)]
451            call_args += [repr(arg) for arg in args]
452            call_args += ["%s=%r" % item for item in dargs.iteritems()]
453            prologue_lines.append("job.profilers.add(%s)\n"
454                                    % ", ".join(call_args))
455        cfile = "".join(prologue_lines)
456
457        cfile += open(tmppath).read()
458        open(tmppath, "w").write(cfile)
459
460        # Create and copy state file to remote_control_file + '.state'
461        state_file = host.job.preprocess_client_state()
462        host.send_file(state_file, atrun.remote_control_file + '.init.state')
463        os.remove(state_file)
464
465        # Copy control_file to remote_control_file on the host
466        host.send_file(tmppath, atrun.remote_control_file)
467        if os.path.abspath(tmppath) != os.path.abspath(control_file):
468            os.remove(tmppath)
469
470        atrun.execute_control(
471                timeout=timeout,
472                client_disconnect_timeout=client_disconnect_timeout)
473
474
475    @classmethod
476    def _check_client_test_result(cls, host, test_name):
477        """
478        Check result of client test.
479        Autotest will store results in the file name status.
480        We check that second to last line in that file begins with 'END GOOD'
481
482        @raises TestFail: If client test does not pass.
483        """
484        client_result_dir = '%s/results/default' % host.autodir
485        command = 'tail -2 %s/status | head -1' % client_result_dir
486        status = host.run(command).stdout.strip()
487        logging.info(status)
488        if status[:8] != 'END GOOD':
489            raise error.TestFail('%s client test did not pass.' % test_name)
490
491
492    def run_timed_test(self, test_name, results_dir='.', host=None,
493                       timeout=None, parallel_flag=False,
494                       client_disconnect_timeout=None, *args, **dargs):
495        """
496        Assemble a tiny little control file to just run one test,
497        and run it as an autotest client-side test
498        """
499        if not host:
500            host = self.host
501        if not self.installed:
502            self.install(host)
503        opts = ["%s=%s" % (o[0], repr(o[1])) for o in dargs.items()]
504        cmd = ", ".join([repr(test_name)] + map(repr, args) + opts)
505        control = "job.run_test(%s)\n" % cmd
506        self.run(control, results_dir, host, timeout=timeout,
507                 parallel_flag=parallel_flag,
508                 client_disconnect_timeout=client_disconnect_timeout)
509
510        if dargs.get('check_client_result', False):
511            self._check_client_test_result(host, test_name)
512
513
514    def run_test(self, test_name, results_dir='.', host=None,
515                 parallel_flag=False,
516                 client_disconnect_timeout=None, *args, **dargs):
517        self.run_timed_test(test_name, results_dir, host, timeout=None,
518                            parallel_flag=parallel_flag,
519                            client_disconnect_timeout=client_disconnect_timeout,
520                            *args, **dargs)
521
522
523class _BaseRun(object):
524    """
525    Represents a run of autotest control file.  This class maintains
526    all the state necessary as an autotest control file is executed.
527
528    It is not intended to be used directly, rather control files
529    should be run using the run method in Autotest.
530    """
531    def __init__(self, host, results_dir, tag, parallel_flag):
532        self.host = host
533        self.results_dir = results_dir
534        self.env = host.env
535        self.tag = tag
536        self.parallel_flag = parallel_flag
537        self.autodir = Autotest.get_installed_autodir(self.host)
538        control = os.path.join(self.autodir, 'control')
539        if tag:
540            control += '.' + tag
541        self.manual_control_file = control
542        self.remote_control_file = control + '.autoserv'
543        self.config_file = os.path.join(self.autodir, 'global_config.ini')
544
545
546    def verify_machine(self):
547        binary = os.path.join(self.autodir, 'bin/autotest')
548        try:
549            self.host.run('ls %s > /dev/null 2>&1' % binary)
550        except:
551            raise error.AutoservInstallError(
552                "Autotest does not appear to be installed")
553
554        if not self.parallel_flag:
555            tmpdir = os.path.join(self.autodir, 'tmp')
556            download = os.path.join(self.autodir, 'tests/download')
557            self.host.run('umount %s' % tmpdir, ignore_status=True)
558            self.host.run('umount %s' % download, ignore_status=True)
559
560
561    def get_base_cmd_args(self):
562        args = ['--verbose']
563        if self.tag:
564            args.append('-t %s' % self.tag)
565        if self.host.job.use_external_logging():
566            args.append('-l')
567        if self.host.hostname:
568            args.append('--hostname=%s' % self.host.hostname)
569        args.append('--user=%s' % self.host.job.user)
570
571        args.append(self.remote_control_file)
572        return args
573
574
575    def get_daemon_cmd(self, monitor_dir):
576        cmd = ['nohup', os.path.join(self.autodir, 'bin/autotestd'),
577               monitor_dir, '-H autoserv']
578        cmd += self.get_base_cmd_args()
579        cmd += ['>/dev/null', '2>/dev/null', '&']
580        return ' '.join(cmd)
581
582
583    def get_monitor_cmd(self, monitor_dir, stdout_read, stderr_read):
584        cmd = [os.path.join(self.autodir, 'bin', 'autotestd_monitor'),
585               monitor_dir, str(stdout_read), str(stderr_read)]
586        return ' '.join(cmd)
587
588
589    def get_client_log(self):
590        """Find what the "next" client.* prefix should be
591
592        @returns A string of the form client.INTEGER that should be prefixed
593            to all client debug log files.
594        """
595        max_digit = -1
596        debug_dir = os.path.join(self.results_dir, 'debug')
597        client_logs = glob.glob(os.path.join(debug_dir, 'client.*.*'))
598        for log in client_logs:
599            _, number, _ = log.split('.', 2)
600            if number.isdigit():
601                max_digit = max(max_digit, int(number))
602        return 'client.%d' % (max_digit + 1)
603
604
605    def copy_client_config_file(self, client_log_prefix=None):
606        """
607        Create and copy the client config file based on the server config.
608
609        @param client_log_prefix: Optional prefix to prepend to log files.
610        """
611        client_config_file = self._create_client_config_file(client_log_prefix)
612        self.host.send_file(client_config_file, self.config_file)
613        os.remove(client_config_file)
614
615
616    def _create_client_config_file(self, client_log_prefix=None):
617        """
618        Create a temporary file with the [CLIENT] section configuration values
619        taken from the server global_config.ini.
620
621        @param client_log_prefix: Optional prefix to prepend to log files.
622
623        @return: Path of the temporary file generated.
624        """
625        config = global_config.global_config.get_section_values('CLIENT')
626        if client_log_prefix:
627            config.set('CLIENT', 'default_logging_name', client_log_prefix)
628        return self._create_aux_file(config.write)
629
630
631    def _create_aux_file(self, func, *args):
632        """
633        Creates a temporary file and writes content to it according to a
634        content creation function. The file object is appended to *args, which
635        is then passed to the content creation function
636
637        @param func: Function that will be used to write content to the
638                temporary file.
639        @param *args: List of parameters that func takes.
640        @return: Path to the temporary file that was created.
641        """
642        fd, path = tempfile.mkstemp(dir=self.host.job.tmpdir)
643        aux_file = os.fdopen(fd, "w")
644        try:
645            list_args = list(args)
646            list_args.append(aux_file)
647            func(*list_args)
648        finally:
649            aux_file.close()
650        return path
651
652
653    @staticmethod
654    def is_client_job_finished(last_line):
655        return bool(re.match(r'^END .*\t----\t----\t.*$', last_line))
656
657
658    @staticmethod
659    def is_client_job_rebooting(last_line):
660        return bool(re.match(r'^\t*GOOD\t----\treboot\.start.*$', last_line))
661
662
663    def _diagnose_dut(self, old_boot_id=None):
664        """
665        Run diagnostic checks on a DUT.
666
667        1. ping: A dead host will not respond to pings.
668        2. ssh (happens with 3.): DUT hangs usually fail in authentication
669            but respond to pings.
670        3. Check if a reboot occured: A healthy but unexpected reboot leaves the
671            host running with a new boot id.
672
673        This method will always raise an exception from the AutotestFailure
674        family and should only get called when the reason for a test failing
675        is ambiguous.
676
677        @raises AutotestDeviceNotPingable: If the DUT doesn't respond to ping.
678        @raises AutotestDeviceNotSSHable: If we cannot SSH into the DUT.
679        @raises AutotestDeviceRebooted: If the boot id changed.
680        @raises AutotestAbort: If none of the above exceptions were raised.
681            Since we have no recourse we must abort at this stage.
682        """
683        msg = 'Autotest client terminated unexpectedly: '
684        if utils.ping(self.host.hostname, tries=1, deadline=1) != 0:
685            msg += 'DUT is no longer pingable, it may have rebooted or hung.\n'
686            raise AutotestDeviceNotPingable(msg)
687
688        if old_boot_id:
689            try:
690                new_boot_id = self.host.get_boot_id(timeout=60)
691            except Exception as e:
692                msg += ('DUT is pingable but not SSHable, it most likely'
693                        ' sporadically rebooted during testing. %s\n' % str(e))
694                raise AutotestDeviceNotSSHable(msg)
695            else:
696                if new_boot_id != old_boot_id:
697                    msg += 'DUT rebooted during the test run.\n'
698                    raise AutotestDeviceRebooted(msg)
699
700            msg += ('DUT is pingable, SSHable and did NOT restart '
701                    'un-expectedly. We probably lost connectivity during the '
702                    'test.')
703        else:
704            msg += ('DUT is pingable, could not determine if an un-expected '
705                    'reboot occured during the test.')
706
707        raise AutotestAbort(msg)
708
709
710    def log_unexpected_abort(self, stderr_redirector, old_boot_id=None):
711        """
712        Logs that something unexpected happened, then tries to diagnose the
713        failure. The purpose of this function is only to close out the status
714        log with the appropriate error message, not to critically terminate
715        the program.
716
717        @param stderr_redirector: log stream.
718        @param old_boot_id: boot id used to infer if a reboot occured.
719        """
720        stderr_redirector.flush_all_buffers()
721        try:
722            self._diagnose_dut(old_boot_id)
723        except AutotestFailure as e:
724            self.host.job.record('END ABORT', None, None, str(e))
725
726
727    @staticmethod
728    def _strip_stderr_prologue(stderr):
729        """Strips the 'standard' prologue that get pre-pended to every
730        remote command and returns the text that was actually written to
731        stderr by the remote command."""
732        stderr_lines = stderr.split("\n")[1:]
733        if not stderr_lines:
734            return ""
735        elif stderr_lines[0].startswith("NOTE: autotestd_monitor"):
736            del stderr_lines[0]
737        return "\n".join(stderr_lines)
738
739
740    def _execute_daemon(self, timeout, stderr_redirector,
741                        client_disconnect_timeout):
742        monitor_dir = self.host.get_tmp_dir()
743        daemon_cmd = self.get_daemon_cmd(monitor_dir)
744
745        # grab the location for the server-side client log file
746        client_log_prefix = self.get_client_log()
747        client_log_path = os.path.join(self.results_dir, 'debug',
748                                       client_log_prefix + '.log')
749        client_log = open(client_log_path, 'w', 0)
750        self.copy_client_config_file(client_log_prefix)
751
752        stdout_read = stderr_read = 0
753        self.host.job.push_execution_context(self.results_dir)
754        try:
755            self.host.run(daemon_cmd, ignore_status=True, timeout=timeout)
756            disconnect_warnings = []
757            while True:
758                monitor_cmd = self.get_monitor_cmd(monitor_dir, stdout_read,
759                                                   stderr_read)
760                try:
761                    result = self.host.run(monitor_cmd, ignore_status=True,
762                                           timeout=timeout,
763                                           stdout_tee=client_log,
764                                           stderr_tee=stderr_redirector)
765                except error.AutoservRunError, e:
766                    result = e.result_obj
767                    result.exit_status = None
768                    disconnect_warnings.append(e.description)
769
770                    stderr_redirector.log_warning(
771                        "Autotest client was disconnected: %s" % e.description,
772                        "NETWORK")
773                except error.AutoservSSHTimeout:
774                    result = utils.CmdResult(monitor_cmd, "", "", None, 0)
775                    stderr_redirector.log_warning(
776                        "Attempt to connect to Autotest client timed out",
777                        "NETWORK")
778
779                stdout_read += len(result.stdout)
780                stderr_read += len(self._strip_stderr_prologue(result.stderr))
781
782                if result.exit_status is not None:
783                    # TODO (crosbug.com/38224)- sbasi: Remove extra logging.
784                    logging.debug('Result exit status is %d.',
785                                  result.exit_status)
786                    return result
787                elif not self.host.wait_up(client_disconnect_timeout):
788                    raise error.AutoservSSHTimeout(
789                        "client was disconnected, reconnect timed out")
790        finally:
791            client_log.close()
792            self.host.job.pop_execution_context()
793
794
795    def _really_execute_control(self, timeout, stderr_redirector,
796                                client_disconnect_timeout):
797        logging.info("Executing %s/bin/autotest %s/controt",
798                     self.autodir, self.autodir)
799
800        result = self._execute_daemon(timeout, stderr_redirector,
801                                        client_disconnect_timeout)
802
803        last_line = stderr_redirector.last_line
804
805        # check if we failed hard enough to warrant an exception
806        if result.exit_status == 1:
807            err = error.AutotestRunError("client job was aborted")
808        elif not result.stderr:
809            err = error.AutotestRunError(
810                "_really_execute_control failed to return anything\n"
811                "stdout:%s\n" % result.stdout)
812        else:
813            err = None
814
815        # log something if the client failed AND never finished logging
816        if err and not self.is_client_job_finished(last_line):
817            self.log_unexpected_abort(stderr_redirector)
818
819        if err:
820            raise err
821        else:
822            return stderr_redirector.last_line
823
824
825    def execute_control(self, timeout=None, client_disconnect_timeout=None):
826        collector = log_collector(self.host, self.tag, self.results_dir)
827        hostname = self.host.hostname
828        remote_results = collector.client_results_dir
829        local_results = collector.server_results_dir
830        self.host.job.add_client_log(hostname, remote_results,
831                                        local_results)
832        job_record_context = self.host.job.get_record_context()
833        logger = client_logger(self.host, self.tag, self.results_dir)
834
835        try:
836            boot_id = self.host.get_boot_id()
837            last = self._really_execute_control(timeout, logger,
838                                                client_disconnect_timeout)
839            if self.is_client_job_finished(last):
840                logging.info("Client complete")
841                return
842            elif self.is_client_job_rebooting(last):
843                # TODO(crbug.com/684311) This feature is never used. Validate
844                # and drop this case.
845                m = 'chromeos/autotest/errors/client_test_triggered_reboot'
846                metrics.Counter(m).increment()
847                self.host.job.record("ABORT", None, "reboot",
848                                     'client triggered reboot is unsupported')
849                self.host.job.record("END ABORT", None, None,
850                                     'client triggered reboot is unsupported')
851                return
852
853            # If a test fails without probable cause we try to bucket it's
854            # failure into one of 2 categories. If we can determine the
855            # current state of the device and it is suspicious, we close the
856            # status lines indicating a failure. If we either cannot
857            # determine the state of the device, or it appears totally
858            # healthy, we give up and abort.
859            try:
860                self._diagnose_dut(boot_id)
861            except AutotestDeviceError as e:
862                # The status lines of the test are pretty much tailed to
863                # our log, with indentation, from the client job on the DUT.
864                # So if the DUT goes down unexpectedly we'll end up with a
865                # malformed status log unless we manually unwind the status
866                # stack. Ideally we would want to write a nice wrapper like
867                # server_job methods run_reboot, run_group but they expect
868                # reboots and we don't.
869                self.host.job.record('FAIL', None, None, str(e))
870                self.host.job.record('END FAIL', None, None)
871                self.host.job.record('END GOOD', None, None)
872                self.host.job.failed_with_device_error = True
873                return
874            except AutotestAbort as e:
875                self.host.job.record('ABORT', None, None, str(e))
876                self.host.job.record('END ABORT', None, None)
877
878                # give the client machine a chance to recover from a crash
879                self.host.wait_up(
880                    self.host.HOURS_TO_WAIT_FOR_RECOVERY * 3600)
881                logging.debug('Unexpected final status message from '
882                                'client %s: %s', self.host.hostname, last)
883                # The line 'last' may have sensitive phrases, like
884                # 'END GOOD', which breaks the tko parser. So the error
885                # message will exclude it, since it will be recorded to
886                # status.log.
887                msg = ("Aborting - unexpected final status message from "
888                        "client on %s\n") % self.host.hostname
889                raise error.AutotestRunError(msg)
890        finally:
891            logging.debug('Autotest job finishes running. Below is the '
892                          'post-processing operations.')
893            logger.close()
894            collector.collect_client_job_results()
895            collector.remove_redundant_client_logs()
896            state_file = os.path.basename(self.remote_control_file
897                                            + '.state')
898            state_path = os.path.join(self.results_dir, state_file)
899            self.host.job.postprocess_client_state(state_path)
900            self.host.job.remove_client_log(hostname, remote_results,
901                                            local_results)
902            job_record_context.restore()
903
904            logging.debug('Autotest job finishes.')
905
906        # should only get here if we timed out
907        assert timeout
908        raise error.AutotestTimeoutError()
909
910
911class log_collector(object):
912    def __init__(self, host, client_tag, results_dir):
913        self.host = host
914        if not client_tag:
915            client_tag = "default"
916        self.client_results_dir = os.path.join(host.get_autodir(), "results",
917                                               client_tag)
918        self.server_results_dir = results_dir
919
920
921    def collect_client_job_results(self):
922        """ A method that collects all the current results of a running
923        client job into the results dir. By default does nothing as no
924        client job is running, but when running a client job you can override
925        this with something that will actually do something. """
926        # make an effort to wait for the machine to come up
927        try:
928            self.host.wait_up(timeout=30)
929        except error.AutoservError:
930            # don't worry about any errors, we'll try and
931            # get the results anyway
932            pass
933
934        # Copy all dirs in default to results_dir
935        try:
936            # Build test result directory summary
937            result_tools_runner.run_on_client(
938                    self.host, self.client_results_dir,
939                    ENABLE_RESULT_THROTTLING)
940
941            with metrics.SecondsTimer(
942                    'chromeos/autotest/job/log_collection_duration',
943                    fields={'dut_host_name': self.host.hostname}):
944                self.host.get_file(
945                        self.client_results_dir + '/',
946                        self.server_results_dir,
947                        preserve_symlinks=True)
948        except Exception:
949            # well, don't stop running just because we couldn't get logs
950            e_msg = "Unexpected error copying test result logs, continuing ..."
951            logging.error(e_msg)
952            traceback.print_exc(file=sys.stdout)
953
954
955    def remove_redundant_client_logs(self):
956        """Remove client.*.log files in favour of client.*.DEBUG files."""
957        debug_dir = os.path.join(self.server_results_dir, 'debug')
958        debug_files = [f for f in os.listdir(debug_dir)
959                       if re.search(r'^client\.\d+\.DEBUG$', f)]
960        for debug_file in debug_files:
961            log_file = debug_file.replace('DEBUG', 'log')
962            log_file = os.path.join(debug_dir, log_file)
963            if os.path.exists(log_file):
964                os.remove(log_file)
965
966
967# a file-like object for catching stderr from an autotest client and
968# extracting status logs from it
969class BaseClientLogger(object):
970    """Partial file object to write to both stdout and
971    the status log file.  We only implement those methods
972    utils.run() actually calls.
973    """
974    status_parser = re.compile(r"^AUTOTEST_STATUS:([^:]*):(.*)$")
975    test_complete_parser = re.compile(r"^AUTOTEST_TEST_COMPLETE:(.*)$")
976    fetch_package_parser = re.compile(
977        r"^AUTOTEST_FETCH_PACKAGE:([^:]*):([^:]*):(.*)$")
978    extract_indent = re.compile(r"^(\t*).*$")
979    extract_timestamp = re.compile(r".*\ttimestamp=(\d+)\t.*$")
980
981    def __init__(self, host, tag, server_results_dir):
982        self.host = host
983        self.job = host.job
984        self.log_collector = log_collector(host, tag, server_results_dir)
985        self.leftover = ""
986        self.last_line = ""
987        self.logs = {}
988
989
990    def _process_log_dict(self, log_dict):
991        log_list = log_dict.pop("logs", [])
992        for key in sorted(log_dict.iterkeys()):
993            log_list += self._process_log_dict(log_dict.pop(key))
994        return log_list
995
996
997    def _process_logs(self):
998        """Go through the accumulated logs in self.log and print them
999        out to stdout and the status log. Note that this processes
1000        logs in an ordering where:
1001
1002        1) logs to different tags are never interleaved
1003        2) logs to x.y come before logs to x.y.z for all z
1004        3) logs to x.y come before x.z whenever y < z
1005
1006        Note that this will in general not be the same as the
1007        chronological ordering of the logs. However, if a chronological
1008        ordering is desired that one can be reconstructed from the
1009        status log by looking at timestamp lines."""
1010        log_list = self._process_log_dict(self.logs)
1011        for entry in log_list:
1012            self.job.record_entry(entry, log_in_subdir=False)
1013        if log_list:
1014            self.last_line = log_list[-1].render()
1015
1016
1017    def _process_quoted_line(self, tag, line):
1018        """Process a line quoted with an AUTOTEST_STATUS flag. If the
1019        tag is blank then we want to push out all the data we've been
1020        building up in self.logs, and then the newest line. If the
1021        tag is not blank, then push the line into the logs for handling
1022        later."""
1023        entry = base_job.status_log_entry.parse(line)
1024        if entry is None:
1025            return  # the line contains no status lines
1026        if tag == "":
1027            self._process_logs()
1028            self.job.record_entry(entry, log_in_subdir=False)
1029            self.last_line = line
1030        else:
1031            tag_parts = [int(x) for x in tag.split(".")]
1032            log_dict = self.logs
1033            for part in tag_parts:
1034                log_dict = log_dict.setdefault(part, {})
1035            log_list = log_dict.setdefault("logs", [])
1036            log_list.append(entry)
1037
1038
1039    def _process_info_line(self, line):
1040        """Check if line is an INFO line, and if it is, interpret any control
1041        messages (e.g. enabling/disabling warnings) that it may contain."""
1042        match = re.search(r"^\t*INFO\t----\t----(.*)\t[^\t]*$", line)
1043        if not match:
1044            return   # not an INFO line
1045        for field in match.group(1).split('\t'):
1046            if field.startswith("warnings.enable="):
1047                func = self.job.warning_manager.enable_warnings
1048            elif field.startswith("warnings.disable="):
1049                func = self.job.warning_manager.disable_warnings
1050            else:
1051                continue
1052            warning_type = field.split("=", 1)[1]
1053            func(warning_type)
1054
1055
1056    def _process_line(self, line):
1057        """Write out a line of data to the appropriate stream. Status
1058        lines sent by autotest will be prepended with
1059        "AUTOTEST_STATUS", and all other lines are ssh error
1060        messages."""
1061        status_match = self.status_parser.search(line)
1062        test_complete_match = self.test_complete_parser.search(line)
1063        fetch_package_match = self.fetch_package_parser.search(line)
1064        if status_match:
1065            tag, line = status_match.groups()
1066            self._process_info_line(line)
1067            self._process_quoted_line(tag, line)
1068        elif test_complete_match:
1069            self._process_logs()
1070            fifo_path, = test_complete_match.groups()
1071            try:
1072                self.log_collector.collect_client_job_results()
1073                self.host.run("echo A > %s" % fifo_path)
1074            except Exception:
1075                msg = "Post-test log collection failed, continuing anyway"
1076                logging.exception(msg)
1077        elif fetch_package_match:
1078            pkg_name, dest_path, fifo_path = fetch_package_match.groups()
1079            serve_packages = global_config.global_config.get_config_value(
1080                "PACKAGES", "serve_packages_from_autoserv", type=bool)
1081            if serve_packages and pkg_name.endswith(".tar.bz2"):
1082                try:
1083                    self._send_tarball(pkg_name, dest_path)
1084                except Exception:
1085                    msg = "Package tarball creation failed, continuing anyway"
1086                    logging.exception(msg)
1087            try:
1088                self.host.run("echo B > %s" % fifo_path)
1089            except Exception:
1090                msg = "Package tarball installation failed, continuing anyway"
1091                logging.exception(msg)
1092        else:
1093            logging.info(line)
1094
1095
1096    def _send_tarball(self, pkg_name, remote_dest):
1097        name, pkg_type = self.job.pkgmgr.parse_tarball_name(pkg_name)
1098        src_dirs = []
1099        if pkg_type == 'test':
1100            for test_dir in ['site_tests', 'tests']:
1101                src_dir = os.path.join(self.job.clientdir, test_dir, name)
1102                if os.path.exists(src_dir):
1103                    src_dirs += [src_dir]
1104                    if AUTOSERV_PREBUILD:
1105                        prebuild.setup(self.job.clientdir, src_dir)
1106                    break
1107        elif pkg_type == 'profiler':
1108            src_dirs += [os.path.join(self.job.clientdir, 'profilers', name)]
1109            if AUTOSERV_PREBUILD:
1110                prebuild.setup(self.job.clientdir, src_dir)
1111        elif pkg_type == 'dep':
1112            src_dirs += [os.path.join(self.job.clientdir, 'deps', name)]
1113        elif pkg_type == 'client':
1114            return  # you must already have a client to hit this anyway
1115        else:
1116            return  # no other types are supported
1117
1118        # iterate over src_dirs until we find one that exists, then tar it
1119        for src_dir in src_dirs:
1120            if os.path.exists(src_dir):
1121                try:
1122                    logging.info('Bundling %s into %s', src_dir, pkg_name)
1123                    temp_dir = autotemp.tempdir(unique_id='autoserv-packager',
1124                                                dir=self.job.tmpdir)
1125                    tarball_path = self.job.pkgmgr.tar_package(
1126                        pkg_name, src_dir, temp_dir.name, " .")
1127                    self.host.send_file(tarball_path, remote_dest)
1128                finally:
1129                    temp_dir.clean()
1130                return
1131
1132
1133    def log_warning(self, msg, warning_type):
1134        """Injects a WARN message into the current status logging stream."""
1135        timestamp = int(time.time())
1136        if self.job.warning_manager.is_valid(timestamp, warning_type):
1137            self.job.record('WARN', None, None, msg)
1138
1139
1140    def write(self, data):
1141        # now start processing the existing buffer and the new data
1142        data = self.leftover + data
1143        lines = data.split('\n')
1144        processed_lines = 0
1145        try:
1146            # process all the buffered data except the last line
1147            # ignore the last line since we may not have all of it yet
1148            for line in lines[:-1]:
1149                self._process_line(line)
1150                processed_lines += 1
1151        finally:
1152            # save any unprocessed lines for future processing
1153            self.leftover = '\n'.join(lines[processed_lines:])
1154
1155
1156    def flush(self):
1157        sys.stdout.flush()
1158
1159
1160    def flush_all_buffers(self):
1161        if self.leftover:
1162            self._process_line(self.leftover)
1163            self.leftover = ""
1164        self._process_logs()
1165        self.flush()
1166
1167
1168    def close(self):
1169        self.flush_all_buffers()
1170
1171
1172SiteAutotest = client_utils.import_site_class(
1173    __file__, "autotest_lib.server.site_autotest", "SiteAutotest",
1174    BaseAutotest)
1175
1176
1177_SiteRun = client_utils.import_site_class(
1178    __file__, "autotest_lib.server.site_autotest", "_SiteRun", _BaseRun)
1179
1180
1181SiteClientLogger = client_utils.import_site_class(
1182    __file__, "autotest_lib.server.site_autotest", "SiteClientLogger",
1183    BaseClientLogger)
1184
1185
1186class Autotest(SiteAutotest):
1187    pass
1188
1189
1190class client_logger(SiteClientLogger):
1191    pass
1192
1193
1194class _Run(_SiteRun):
1195    pass
1196