// Copyright 2016 syzkaller project authors. All rights reserved. // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. // Package report contains functions that process kernel output, // detect/extract crash messages, symbolize them, etc. package report import ( "bufio" "bytes" "fmt" "regexp" "strings" "github.com/google/syzkaller/pkg/mgrconfig" ) type Reporter interface { // ContainsCrash searches kernel console output for oops messages. ContainsCrash(output []byte) bool // Parse extracts information about oops from console output. // Returns nil if no oops found. Parse(output []byte) *Report // Symbolize symbolizes rep.Report and fills in Maintainers. Symbolize(rep *Report) error } type Report struct { // Title contains a representative description of the first oops. Title string // Report contains whole oops text. Report []byte // Output contains whole raw console output as passed to Reporter.Parse. Output []byte // StartPos/EndPos denote region of output with oops message(s). StartPos int EndPos int // Suppressed indicates whether the report should not be reported to user. Suppressed bool // Corrupted indicates whether the report is truncated of corrupted in some other way. Corrupted bool // CorruptedReason contains reason why the report is marked as corrupted. CorruptedReason string // Maintainers is list of maintainer emails. Maintainers []string } // NewReporter creates reporter for the specified OS/Type. func NewReporter(cfg *mgrconfig.Config) (Reporter, error) { typ := cfg.TargetOS if cfg.Type == "gvisor" { typ = cfg.Type } ctor := ctors[typ] if ctor == nil { return nil, fmt.Errorf("unknown OS: %v", typ) } ignores, err := compileRegexps(cfg.Ignores) if err != nil { return nil, err } rep, suppressions, err := ctor(cfg.KernelSrc, cfg.KernelObj, ignores) if err != nil { return nil, err } supps, err := compileRegexps(append(suppressions, cfg.Suppressions...)) if err != nil { return nil, err } return &reporterWrapper{rep, supps}, nil } var ctors = map[string]fn{ "akaros": ctorAkaros, "linux": ctorLinux, "gvisor": ctorGvisor, "freebsd": ctorFreebsd, "netbsd": ctorNetbsd, "fuchsia": ctorFuchsia, "windows": ctorStub, } type fn func(string, string, []*regexp.Regexp) (Reporter, []string, error) func compileRegexps(list []string) ([]*regexp.Regexp, error) { compiled := make([]*regexp.Regexp, len(list)) for i, str := range list { re, err := regexp.Compile(str) if err != nil { return nil, fmt.Errorf("failed to compile %q: %v", str, err) } compiled[i] = re } return compiled, nil } type reporterWrapper struct { Reporter suppressions []*regexp.Regexp } func (wrap *reporterWrapper) Parse(output []byte) *Report { rep := wrap.Reporter.Parse(output) if rep == nil { return nil } rep.Title = sanitizeTitle(replaceTable(dynamicTitleReplacement, rep.Title)) rep.Suppressed = matchesAny(rep.Output, wrap.suppressions) return rep } func IsSuppressed(reporter Reporter, output []byte) bool { return matchesAny(output, reporter.(*reporterWrapper).suppressions) } type replacement struct { match *regexp.Regexp replacement string } func replaceTable(replacements []replacement, str string) string { for _, repl := range replacements { str = repl.match.ReplaceAllString(str, repl.replacement) } return str } var dynamicTitleReplacement = []replacement{ { // Executor PIDs are not interesting. regexp.MustCompile(`syz-executor[0-9]+((/|:)[0-9]+)?`), "syz-executor", }, { // syzkaller binaries are coming from repro. regexp.MustCompile(`syzkaller[0-9]+((/|:)[0-9]+)?`), "syzkaller", }, { // Replace that everything looks like an address with "ADDR", // addresses in descriptions can't be good regardless of the oops regexps. regexp.MustCompile(`([^a-zA-Z])(?:0x)?[0-9a-f]{6,}`), "${1}ADDR", }, { // Replace that everything looks like a decimal number with "NUM". regexp.MustCompile(`([^a-zA-Z])[0-9]{5,}`), "${1}NUM", }, { // Replace that everything looks like a file line number with "LINE". regexp.MustCompile(`(:[0-9]+)+`), ":LINE", }, { // Replace all raw references to runctions (e.g. "ip6_fragment+0x1052/0x2d80") // with just function name ("ip6_fragment"). Offsets and sizes are not stable. regexp.MustCompile(`([a-zA-Z][a-zA-Z0-9_.]+)\+0x[0-9a-z]+/0x[0-9a-z]+`), "${1}", }, { // CPU numbers are not interesting. regexp.MustCompile(`CPU#[0-9]+`), "CPU", }, } func sanitizeTitle(title string) string { const maxTitleLen = 120 // Corrupted/intermixed lines can be very long. res := make([]byte, 0, len(title)) prev := byte(' ') for i := 0; i < len(title) && i < maxTitleLen; i++ { ch := title[i] switch { case ch == '\t': ch = ' ' case ch < 0x20 || ch >= 0x7f: continue } if ch == ' ' && prev == ' ' { continue } res = append(res, ch) prev = ch } return strings.TrimSpace(string(res)) } type guilter interface { extractGuiltyFile([]byte) string } func (wrap reporterWrapper) extractGuiltyFile(report []byte) string { if g, ok := wrap.Reporter.(guilter); ok { return g.extractGuiltyFile(report) } panic("not implemented") } type oops struct { header []byte formats []oopsFormat suppressions []*regexp.Regexp } type oopsFormat struct { title *regexp.Regexp // If title is matched but report is not, the report is considered corrupted. report *regexp.Regexp // Format string to create report title. // Strings captured by title (or by report if present) are passed as input. // If stack is not nil, extracted function name is passed as an additional last argument. fmt string // If not nil, a function name is extracted from the report and passed to fmt. // If not nil but frame extraction fails, the report is considered corrupted. stack *stackFmt noStackTrace bool corrupted bool } type stackFmt struct { // parts describe how guilty stack frame must be extracted from the report. // parts are matched consecutively potentially capturing frames. // parts can be of 3 types: // - non-capturing regexp, matched against report and advances current position // - capturing regexp, same as above, but also yields a frame // - special value parseStackTrace means that a stack trace must be parsed // starting from current position parts []*regexp.Regexp // If parts2 is present it is tried when parts matching fails. parts2 []*regexp.Regexp // Skip these functions in stack traces (matched as substring). skip []string } var parseStackTrace *regexp.Regexp func compile(re string) *regexp.Regexp { re = strings.Replace(re, "{{ADDR}}", "0x[0-9a-f]+", -1) re = strings.Replace(re, "{{PC}}", "\\[\\<(?:0x)?[0-9a-f]+\\>\\]", -1) re = strings.Replace(re, "{{FUNC}}", "([a-zA-Z0-9_]+)(?:\\.|\\+)", -1) re = strings.Replace(re, "{{SRC}}", "([a-zA-Z0-9-_/.]+\\.[a-z]+:[0-9]+)", -1) return regexp.MustCompile(re) } func containsCrash(output []byte, oopses []*oops, ignores []*regexp.Regexp) bool { for pos := 0; pos < len(output); { next := bytes.IndexByte(output[pos:], '\n') if next != -1 { next += pos } else { next = len(output) } for _, oops := range oopses { match := matchOops(output[pos:next], oops, ignores) if match == -1 { continue } return true } pos = next + 1 } return false } func matchOops(line []byte, oops *oops, ignores []*regexp.Regexp) int { match := bytes.Index(line, oops.header) if match == -1 { return -1 } if matchesAny(line, oops.suppressions) { return -1 } if matchesAny(line, ignores) { return -1 } return match } func extractDescription(output []byte, oops *oops, params *stackParams) ( desc string, corrupted string, format oopsFormat) { startPos := len(output) matchedTitle := false for _, f := range oops.formats { match := f.title.FindSubmatchIndex(output) if match == nil || match[0] > startPos { continue } if match[0] == startPos && desc != "" { continue } if match[0] < startPos { desc = "" format = oopsFormat{} startPos = match[0] } matchedTitle = true if f.report != nil { match = f.report.FindSubmatchIndex(output) if match == nil { continue } } var args []interface{} for i := 2; i < len(match); i += 2 { args = append(args, string(output[match[i]:match[i+1]])) } corrupted = "" if f.stack != nil { frame := "" frame, corrupted = extractStackFrame(params, f.stack, output[match[0]:]) if frame == "" { frame = "corrupted" if corrupted == "" { corrupted = "extracted no stack frame" } } args = append(args, frame) } desc = fmt.Sprintf(f.fmt, args...) format = f } if len(desc) == 0 { // If we are here and matchedTitle is set, it means that we've matched // a title of an oops but not full report regexp or stack trace, // which means the report was corrupted. if matchedTitle { corrupted = "matched title but not report regexp" } pos := bytes.Index(output, oops.header) if pos == -1 { return } end := bytes.IndexByte(output[pos:], '\n') if end == -1 { end = len(output) } else { end += pos } desc = string(output[pos:end]) } if corrupted == "" && format.corrupted { corrupted = "report format is marked as corrupted" } return } type stackParams struct { // stackStartRes matches start of stack traces. stackStartRes []*regexp.Regexp // frameRes match different formats of lines containing kernel frames (capture function name). frameRes []*regexp.Regexp // skipPatterns match functions that must be unconditionally skipped. skipPatterns []string // If we looked at any lines that match corruptedLines during report analysis, // then the report is marked as corrupted. corruptedLines []*regexp.Regexp } func extractStackFrame(params *stackParams, stack *stackFmt, output []byte) (string, string) { skip := append([]string{}, params.skipPatterns...) skip = append(skip, stack.skip...) var skipRe *regexp.Regexp if len(skip) != 0 { skipRe = regexp.MustCompile(strings.Join(skip, "|")) } frame, corrupted := extractStackFrameImpl(params, output, skipRe, stack.parts) if frame != "" || len(stack.parts2) == 0 { return frame, corrupted } return extractStackFrameImpl(params, output, skipRe, stack.parts2) } func extractStackFrameImpl(params *stackParams, output []byte, skipRe *regexp.Regexp, parts []*regexp.Regexp) (string, string) { corrupted := "" s := bufio.NewScanner(bytes.NewReader(output)) nextPart: for _, part := range parts { if part == parseStackTrace { for s.Scan() { ln := bytes.Trim(s.Bytes(), "\r") if corrupted == "" && matchesAny(ln, params.corruptedLines) { corrupted = "corrupted line in report (1)" } if matchesAny(ln, params.stackStartRes) { continue nextPart } var match []int for _, re := range params.frameRes { match = re.FindSubmatchIndex(ln) if match != nil { break } } if match == nil { continue } frame := ln[match[2]:match[3]] if skipRe == nil || !skipRe.Match(frame) { return string(frame), corrupted } } } else { for s.Scan() { ln := bytes.Trim(s.Bytes(), "\r") if corrupted == "" && matchesAny(ln, params.corruptedLines) { corrupted = "corrupted line in report (2)" } match := part.FindSubmatchIndex(ln) if match == nil { continue } if len(match) == 4 && match[2] != -1 { frame := ln[match[2]:match[3]] if skipRe == nil || !skipRe.Match(frame) { return string(frame), corrupted } } break } } } return "", corrupted } func simpleLineParser(output []byte, oopses []*oops, params *stackParams, ignores []*regexp.Regexp) *Report { rep := &Report{ Output: output, } var oops *oops for pos := 0; pos < len(output); { next := bytes.IndexByte(output[pos:], '\n') if next != -1 { next += pos } else { next = len(output) } line := output[pos:next] for _, oops1 := range oopses { match := matchOops(line, oops1, ignores) if match != -1 { oops = oops1 rep.StartPos = pos break } } if oops != nil { break } pos = next + 1 } if oops == nil { return nil } title, corrupted, _ := extractDescription(output[rep.StartPos:], oops, params) rep.Title = title rep.Report = output[rep.StartPos:] rep.Corrupted = corrupted != "" rep.CorruptedReason = corrupted return rep } func matchesAny(line []byte, res []*regexp.Regexp) bool { for _, re := range res { if re.Match(line) { return true } } return false } // replace replaces [start:end] in where with what, inplace. func replace(where []byte, start, end int, what []byte) []byte { if len(what) >= end-start { where = append(where, what[end-start:]...) copy(where[start+len(what):], where[end:]) copy(where[start:], what) } else { copy(where[start+len(what):], where[end:]) where = where[:len(where)-(end-start-len(what))] copy(where[start:], what) } return where } var ( filenameRe = regexp.MustCompile(`[a-zA-Z0-9_\-\./]*[a-zA-Z0-9_\-]+\.(c|h):[0-9]+`) )