• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import logging
6import os
7import re
8import shutil
9from autotest_lib.client.common_lib import utils as client_utils
10from autotest_lib.client.common_lib.cros import dev_server
11from autotest_lib.client.common_lib.cros import retry
12from autotest_lib.client.cros import constants
13from autotest_lib.server.cros.dynamic_suite.constants import JOB_BUILD_KEY
14from autotest_lib.server.crashcollect import collect_log_file
15from autotest_lib.server import utils
16
17try:
18    from chromite.lib import metrics
19except ImportError:
20    metrics = client_utils.metrics_mock
21
22
23def generate_minidump_stacktrace(minidump_path):
24    """
25    Generates a stacktrace for the specified minidump.
26
27    This function expects the debug symbols to reside under:
28        /build/<board>/usr/lib/debug
29
30    @param minidump_path: absolute path to minidump to by symbolicated.
31    @raise client_utils.error.CmdError if minidump_stackwalk return code != 0.
32    """
33    symbol_dir = '%s/../../../lib/debug' % utils.get_server_dir()
34    logging.info('symbol_dir: %s', symbol_dir)
35    client_utils.run('minidump_stackwalk "%s" "%s" > "%s.txt"' %
36                     (minidump_path, symbol_dir, minidump_path))
37
38
39def _resolve_crashserver():
40    """
41    Attempts to find a devserver / crashserver that has capacity to
42    symbolicate a crashdump.
43
44    @raises DevServerException if no server with capacity could be found.
45    @returns Hostname of resolved server, if found.
46    """
47    crashserver_name = dev_server.get_least_loaded_devserver(
48            devserver_type=dev_server.CrashServer)
49    if not crashserver_name:
50        metrics.Counter('chromeos/autotest/crashcollect/could_not_resolve'
51                        ).increment()
52        raise dev_server.DevServerException(
53                'No crash server has the capacity to symbolicate the dump.')
54    else:
55        metrics.Counter('chromeos/autotest/crashcollect/resolved'
56                        ).increment(fields={'crash_server': crashserver_name})
57    return crashserver_name
58
59
60def _symbolicate_minidump_with_devserver(minidump_path, resultdir,
61                                        crashserver_name):
62    """
63    Generates a stack trace for the specified minidump by consulting devserver.
64
65    This function assumes the debug symbols have been staged on the devserver.
66
67    @param minidump_path: absolute path to minidump to by symbolicated.
68    @param resultdir: server job's result directory.
69    @param crashserver_name: Name of crashserver to attempt to symbolicate with.
70    @raise DevServerException upon failure, HTTP or otherwise.
71    """
72    # First, look up what build we tested.  If we can't find this, we can't
73    # get the right debug symbols, so we might as well give up right now.
74    keyvals = client_utils.read_keyval(resultdir)
75    if JOB_BUILD_KEY not in keyvals:
76        raise dev_server.DevServerException(
77            'Cannot determine build being tested.')
78
79    devserver = dev_server.CrashServer(crashserver_name)
80
81    with metrics.SecondsTimer(
82            'chromeos/autotest/crashcollect/symbolicate_duration',
83            fields={'crash_server': crashserver_name}):
84        trace_text = devserver.symbolicate_dump(minidump_path,
85                                                keyvals[JOB_BUILD_KEY])
86
87    if not trace_text:
88        raise dev_server.DevServerException('Unknown error!!')
89    with open(minidump_path + '.txt', 'w') as trace_file:
90        trace_file.write(trace_text)
91
92def generate_stacktrace_for_file(minidump, host_resultdir):
93    """
94    Tries to generate a stack trace for the file located at |minidump|.
95    @param minidump: path to minidump file to generate the stacktrace for.
96    @param host_resultdir: server job's result directory.
97    """
98    # First, try to symbolicate locally.
99    try:
100        logging.info('Trying to generate stack trace locally for %s', minidump)
101        generate_minidump_stacktrace(minidump)
102        logging.info('Generated stack trace for dump %s', minidump)
103        return
104    except client_utils.error.CmdError as err:
105        logging.info('Failed to generate stack trace locally for '
106                     'dump %s (rc=%d):\n%r',
107                     minidump, err.result_obj.exit_status, err)
108
109    # If that did not succeed, try to symbolicate using the dev server.
110    try:
111        logging.info('Generating stack trace using devserver for %s', minidump)
112        crashserver_name = _resolve_crashserver()
113        args = (minidump, host_resultdir, crashserver_name)
114        is_timeout, _ = retry.timeout(_symbolicate_minidump_with_devserver,
115                                      args=args,
116                                      timeout_sec=600)
117        if is_timeout:
118            logging.info('Generating stack trace timed out for dump %s',
119                         minidump)
120            metrics.Counter(
121                    'chromeos/autotest/crashcollect/symbolicate_timed_out'
122            ).increment(fields={'crash_server': crashserver_name})
123        else:
124            logging.info('Generated stack trace for dump %s', minidump)
125            return
126    except dev_server.DevServerException as e:
127        logging.info('Failed to generate stack trace on devserver for dump '
128                     '%s:\n%r', minidump, e)
129
130    # Symbolicating failed.
131    logging.warning('Failed to generate stack trace for %s (see info logs)',
132                    minidump)
133
134def find_and_generate_minidump_stacktraces(host_resultdir):
135    """
136    Finds all minidump files and generates a stack trace for each.
137
138    Enumerates all files under the test results directory (recursively)
139    and generates a stack trace file for the minidumps.  Minidump files are
140    identified as files with .dmp extension.  The stack trace filename is
141    composed by appending the .txt extension to the minidump filename.
142
143    @param host_resultdir: Directory to walk looking for dmp files.
144
145    @returns The list of all found minidump files. Each dump may or may not have
146             been symbolized.
147    """
148    minidumps = []
149    for file in _find_crashdumps(host_resultdir):
150        generate_stacktrace_for_file(file, host_resultdir)
151        minidumps.append(file)
152    return minidumps
153
154
155def _find_crashdumps(host_resultdir):
156    """Find crashdumps.
157
158    @param host_resultdir The result directory for this host for this test run.
159    """
160    for dir, subdirs, files in os.walk(host_resultdir):
161        for file in files:
162            if file.endswith('.dmp'):
163                yield os.path.join(dir, file)
164
165
166def _find_orphaned_crashdumps(host):
167    """Return file paths of crashdumps on host.
168
169    @param host A host object of the device.
170    """
171    return host.list_files_glob(os.path.join(constants.CRASH_DIR, '*'))
172
173
174def report_crashdumps(host):
175    """Report on crashdumps for host.
176
177    This is run when no tests failed.  We don't process crashdumps in this
178    case because of devserver load, but they should still be reported.
179
180    @param host A host object of the device we're to pull crashes from.
181    """
182    for crashfile in _find_orphaned_crashdumps(host):
183        logging.warning('Host crashdump exists: %s', crashfile)
184        host.job.record('INFO', None, None,
185                        'Host crashdump exists: %s' % (crashfile,))
186
187    host_resultdir = _get_host_resultdir(host)
188    for crashfile in _find_crashdumps(host_resultdir):
189        logging.warning('Local crashdump exists: %s', crashfile)
190        host.job.record('INFO', None, None,
191                        'Local crashdump exists: %s' % (crashfile,))
192
193
194def fetch_orphaned_crashdumps(host, infodir):
195    """
196    Copy all of the crashes in the crash directory over to the results folder.
197
198    @param host A host object of the device we're to pull crashes from.
199    @param infodir The directory to fetch crashdumps into.
200    @return The list of minidumps that we pulled back from the host.
201    """
202    if not os.path.exists(infodir):
203        os.mkdir(infodir)
204    orphans = []
205    try:
206        for file in _find_orphaned_crashdumps(host):
207            logging.info('Collecting %s...', file)
208            collect_log_file(host, file, infodir, clean=True)
209            orphans.append(file)
210    except Exception as e:
211        logging.warning('Collection of orphaned crash dumps failed %s', e)
212    finally:
213        # Delete infodir if we have no orphans
214        if not orphans:
215            logging.info('There are no orphaned crashes; deleting %s', infodir)
216            os.rmdir(infodir)
217    return orphans
218
219
220def _copy_to_debug_dir(host_resultdir, filename):
221    """
222    Copies a file to the debug dir under host_resultdir.
223
224    @param host_resultdir The result directory for this host for this test run.
225    @param filename The full path of the file to copy to the debug folder.
226    """
227    debugdir = os.path.join(host_resultdir, 'debug')
228    src = filename
229    dst = os.path.join(debugdir, os.path.basename(filename))
230
231    try:
232        shutil.copyfile(src, dst)
233        logging.info('Copied %s to %s', src, dst)
234    except IOError:
235        logging.warning('Failed to copy %s to %s', src, dst)
236
237
238def _get_host_resultdir(host):
239    """Get resultdir for host.
240
241    @param host A host object of the device we're to pull crashes from.
242    """
243    return getattr(getattr(host, 'job', None), 'resultdir', None)
244
245
246def get_host_infodir(host):
247    """Get infodir for host.
248
249    @param host A host object of the device we're to pull crashes from.
250    """
251    host_resultdir = _get_host_resultdir(host)
252    return os.path.join(host_resultdir, 'crashinfo.%s' % host.hostname)
253
254
255def get_site_crashdumps(host, test_start_time):
256    """
257    Copy all of the crashdumps from a host to the results directory.
258
259    @param host The host object from which to pull crashes
260    @param test_start_time When the test we just ran started.
261    @return A list of all the minidumps
262    """
263    host_resultdir = _get_host_resultdir(host)
264    infodir = get_host_infodir(host)
265
266    orphans = fetch_orphaned_crashdumps(host, infodir)
267    minidumps = find_and_generate_minidump_stacktraces(host_resultdir)
268
269    # Record all crashdumps in status.log of the job:
270    # - If one server job runs several client jobs we will only record
271    # crashdumps in the status.log of the high level server job.
272    # - We will record these crashdumps whether or not we successfully
273    # symbolicate them.
274    if host.job and minidumps or orphans:
275        host.job.record('INFO', None, None, 'Start crashcollection record')
276        for minidump in minidumps:
277            host.job.record('INFO', None, 'New Crash Dump', minidump)
278        for orphan in orphans:
279            host.job.record('INFO', None, 'Orphaned Crash Dump', orphan)
280        host.job.record('INFO', None, None, 'End crashcollection record')
281
282    orphans.extend(minidumps)
283
284    for minidump in orphans:
285        report_bug_from_crash(host, minidump)
286
287    # We copy Chrome crash information to the debug dir to assist debugging.
288    # Since orphans occurred on a previous run, they are most likely not
289    # relevant to the current failure, so we don't copy them.
290    for minidump in minidumps:
291        minidump_no_ext = os.path.splitext(minidump)[0]
292        _copy_to_debug_dir(host_resultdir, minidump_no_ext + '.dmp.txt')
293        _copy_to_debug_dir(host_resultdir, minidump_no_ext + '.log')
294
295    return orphans
296
297
298def find_package_of(host, exec_name):
299    """
300    Find the package that an executable came from.
301
302    @param host A host object that has the executable.
303    @param exec_name Name of or path to executable.
304    @return The name of the package that installed the executable.
305    """
306    # Run "portageq owners" on "host" to determine which package owns
307    # "exec_name."  Portageq queue output consists of package names followed
308    # tab-prefixed path names.  For example, owners of "python:"
309    #
310    # sys-devel/gdb-7.7.1-r2
311    #         /usr/share/gdb/python
312    # chromeos-base/dev-install-0.0.1-r711
313    #         /usr/bin/python
314    # dev-lang/python-2.7.3-r7
315    #         /etc/env.d/python
316    #
317    # This gets piped into "xargs stat" to annotate each line with
318    # information about the path, so we later can consider only packages
319    # with executable files.  After annotation the above looks like:
320    #
321    # stat: cannot stat '@@@ sys-devel/gdb-7.7.1-r2 @@@': ...
322    # stat: cannot stat '/usr/share/gdb/python': ...
323    # stat: cannot stat '@@@ chromeos-base/dev-install-0.0.1-r711 @@@': ...
324    # 755 -rwxr-xr-x /usr/bin/python
325    # stat: cannot stat '@@@ dev-lang/python-2.7.3-r7 @@@': ...
326    # 755 drwxr-xr-x /etc/env.d/python
327    #
328    # Package names are surrounded by "@@@" to facilitate parsing.  Lines
329    # starting with an octal number were successfully annotated, because
330    # the path existed on "host."
331    # The above is then parsed to find packages which contain executable files
332    # (not directories), in this case "chromeos-base/dev-install-0.0.1-r711."
333    #
334    # TODO(milleral): portageq can show scary looking error messages
335    # in the debug logs via stderr. We only look at stdout, so those
336    # get filtered, but it would be good to silence them.
337    cmd = ('portageq owners / ' + exec_name +
338            r'| sed -e "s/^[^\t].*/@@@ & @@@/" -e "s/^\t//"'
339            r'| tr \\n \\0'
340            ' | xargs -0 -r stat -L -c "%a %A %n" 2>&1')
341    portageq = host.run(cmd, ignore_status=True)
342
343    # Parse into a set of names of packages containing an executable file.
344    packages = set()
345    pkg = ''
346    pkg_re = re.compile('@@@ (.*) @@@')
347    path_re = re.compile('^([0-7]{3,}) (.)')
348    for line in portageq.stdout.splitlines():
349        match = pkg_re.search(line)
350        if match:
351            pkg = match.group(1)
352            continue
353        match = path_re.match(line)
354        if match:
355            isexec = int(match.group(1), 8) & 0o111
356            isfile = match.group(2) == '-'
357            if pkg and isexec and isfile:
358                packages.add(pkg)
359
360    # If exactly one package found it must be the one we want, return it.
361    if len(packages) == 1:
362        return packages.pop()
363
364    # TODO(milleral): Decide if it really is an error if not exactly one
365    # package is found.
366    # It is highly questionable as to if this should be left in the
367    # production version of this code or not.
368    if len(packages) == 0:
369        logging.warning('find_package_of() found no packages for "%s"',
370                        exec_name)
371    else:
372        logging.warning('find_package_of() found multiple packages for "%s": '
373                        '%s', exec_name, ', '.join(packages))
374    return ''
375
376
377def report_bug_from_crash(host, minidump_path):
378    """
379    Given a host to query and a minidump, file a bug about the crash.
380
381    @param host A host object that is where the dump came from
382    @param minidump_path The path to the dump file that should be reported.
383    """
384    # TODO(milleral): Once this has actually been tested, remove the
385    # try/except. In the meantime, let's make sure nothing dies because of
386    # the fact that this code isn't very heavily tested.
387    try:
388        meta_path = os.path.splitext(minidump_path)[0] + '.meta'
389        with open(meta_path, 'r') as f:
390            for line in f.readlines():
391                parts = line.split('=')
392                if parts[0] == 'exec_name':
393                    package = find_package_of(host, parts[1].strip())
394                    if not package:
395                        package = '<unknown package>'
396                    logging.info('Would report crash on %s.', package)
397                    break
398    except Exception as e:
399        logging.warning('Crash detection failed with: %s', e)
400