1// Copyright 2016 syzkaller project authors. All rights reserved. 2// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4// Package report contains functions that process kernel output, 5// detect/extract crash messages, symbolize them, etc. 6package report 7 8import ( 9 "bufio" 10 "bytes" 11 "fmt" 12 "regexp" 13 "strings" 14 15 "github.com/google/syzkaller/pkg/mgrconfig" 16) 17 18type Reporter interface { 19 // ContainsCrash searches kernel console output for oops messages. 20 ContainsCrash(output []byte) bool 21 22 // Parse extracts information about oops from console output. 23 // Returns nil if no oops found. 24 Parse(output []byte) *Report 25 26 // Symbolize symbolizes rep.Report and fills in Maintainers. 27 Symbolize(rep *Report) error 28} 29 30type Report struct { 31 // Title contains a representative description of the first oops. 32 Title string 33 // Report contains whole oops text. 34 Report []byte 35 // Output contains whole raw console output as passed to Reporter.Parse. 36 Output []byte 37 // StartPos/EndPos denote region of output with oops message(s). 38 StartPos int 39 EndPos int 40 // Suppressed indicates whether the report should not be reported to user. 41 Suppressed bool 42 // Corrupted indicates whether the report is truncated of corrupted in some other way. 43 Corrupted bool 44 // CorruptedReason contains reason why the report is marked as corrupted. 45 CorruptedReason string 46 // Maintainers is list of maintainer emails. 47 Maintainers []string 48} 49 50// NewReporter creates reporter for the specified OS/Type. 51func NewReporter(cfg *mgrconfig.Config) (Reporter, error) { 52 typ := cfg.TargetOS 53 if cfg.Type == "gvisor" { 54 typ = cfg.Type 55 } 56 ctor := ctors[typ] 57 if ctor == nil { 58 return nil, fmt.Errorf("unknown OS: %v", typ) 59 } 60 ignores, err := compileRegexps(cfg.Ignores) 61 if err != nil { 62 return nil, err 63 } 64 rep, suppressions, err := ctor(cfg.KernelSrc, cfg.KernelObj, ignores) 65 if err != nil { 66 return nil, err 67 } 68 supps, err := compileRegexps(append(suppressions, cfg.Suppressions...)) 69 if err != nil { 70 return nil, err 71 } 72 return &reporterWrapper{rep, supps}, nil 73} 74 75var ctors = map[string]fn{ 76 "akaros": ctorAkaros, 77 "linux": ctorLinux, 78 "gvisor": ctorGvisor, 79 "freebsd": ctorFreebsd, 80 "netbsd": ctorNetbsd, 81 "fuchsia": ctorFuchsia, 82 "windows": ctorStub, 83} 84 85type fn func(string, string, []*regexp.Regexp) (Reporter, []string, error) 86 87func compileRegexps(list []string) ([]*regexp.Regexp, error) { 88 compiled := make([]*regexp.Regexp, len(list)) 89 for i, str := range list { 90 re, err := regexp.Compile(str) 91 if err != nil { 92 return nil, fmt.Errorf("failed to compile %q: %v", str, err) 93 } 94 compiled[i] = re 95 } 96 return compiled, nil 97} 98 99type reporterWrapper struct { 100 Reporter 101 suppressions []*regexp.Regexp 102} 103 104func (wrap *reporterWrapper) Parse(output []byte) *Report { 105 rep := wrap.Reporter.Parse(output) 106 if rep == nil { 107 return nil 108 } 109 rep.Title = sanitizeTitle(replaceTable(dynamicTitleReplacement, rep.Title)) 110 rep.Suppressed = matchesAny(rep.Output, wrap.suppressions) 111 return rep 112} 113 114func IsSuppressed(reporter Reporter, output []byte) bool { 115 return matchesAny(output, reporter.(*reporterWrapper).suppressions) 116} 117 118type replacement struct { 119 match *regexp.Regexp 120 replacement string 121} 122 123func replaceTable(replacements []replacement, str string) string { 124 for _, repl := range replacements { 125 str = repl.match.ReplaceAllString(str, repl.replacement) 126 } 127 return str 128} 129 130var dynamicTitleReplacement = []replacement{ 131 { 132 // Executor PIDs are not interesting. 133 regexp.MustCompile(`syz-executor[0-9]+((/|:)[0-9]+)?`), 134 "syz-executor", 135 }, 136 { 137 // syzkaller binaries are coming from repro. 138 regexp.MustCompile(`syzkaller[0-9]+((/|:)[0-9]+)?`), 139 "syzkaller", 140 }, 141 { 142 // Replace that everything looks like an address with "ADDR", 143 // addresses in descriptions can't be good regardless of the oops regexps. 144 regexp.MustCompile(`([^a-zA-Z])(?:0x)?[0-9a-f]{6,}`), 145 "${1}ADDR", 146 }, 147 { 148 // Replace that everything looks like a decimal number with "NUM". 149 regexp.MustCompile(`([^a-zA-Z])[0-9]{5,}`), 150 "${1}NUM", 151 }, 152 { 153 // Replace that everything looks like a file line number with "LINE". 154 regexp.MustCompile(`(:[0-9]+)+`), 155 ":LINE", 156 }, 157 { 158 // Replace all raw references to runctions (e.g. "ip6_fragment+0x1052/0x2d80") 159 // with just function name ("ip6_fragment"). Offsets and sizes are not stable. 160 regexp.MustCompile(`([a-zA-Z][a-zA-Z0-9_.]+)\+0x[0-9a-z]+/0x[0-9a-z]+`), 161 "${1}", 162 }, 163 { 164 // CPU numbers are not interesting. 165 regexp.MustCompile(`CPU#[0-9]+`), 166 "CPU", 167 }, 168} 169 170func sanitizeTitle(title string) string { 171 const maxTitleLen = 120 // Corrupted/intermixed lines can be very long. 172 res := make([]byte, 0, len(title)) 173 prev := byte(' ') 174 for i := 0; i < len(title) && i < maxTitleLen; i++ { 175 ch := title[i] 176 switch { 177 case ch == '\t': 178 ch = ' ' 179 case ch < 0x20 || ch >= 0x7f: 180 continue 181 } 182 if ch == ' ' && prev == ' ' { 183 continue 184 } 185 res = append(res, ch) 186 prev = ch 187 } 188 return strings.TrimSpace(string(res)) 189} 190 191type guilter interface { 192 extractGuiltyFile([]byte) string 193} 194 195func (wrap reporterWrapper) extractGuiltyFile(report []byte) string { 196 if g, ok := wrap.Reporter.(guilter); ok { 197 return g.extractGuiltyFile(report) 198 } 199 panic("not implemented") 200} 201 202type oops struct { 203 header []byte 204 formats []oopsFormat 205 suppressions []*regexp.Regexp 206} 207 208type oopsFormat struct { 209 title *regexp.Regexp 210 // If title is matched but report is not, the report is considered corrupted. 211 report *regexp.Regexp 212 // Format string to create report title. 213 // Strings captured by title (or by report if present) are passed as input. 214 // If stack is not nil, extracted function name is passed as an additional last argument. 215 fmt string 216 // If not nil, a function name is extracted from the report and passed to fmt. 217 // If not nil but frame extraction fails, the report is considered corrupted. 218 stack *stackFmt 219 noStackTrace bool 220 corrupted bool 221} 222 223type stackFmt struct { 224 // parts describe how guilty stack frame must be extracted from the report. 225 // parts are matched consecutively potentially capturing frames. 226 // parts can be of 3 types: 227 // - non-capturing regexp, matched against report and advances current position 228 // - capturing regexp, same as above, but also yields a frame 229 // - special value parseStackTrace means that a stack trace must be parsed 230 // starting from current position 231 parts []*regexp.Regexp 232 // If parts2 is present it is tried when parts matching fails. 233 parts2 []*regexp.Regexp 234 // Skip these functions in stack traces (matched as substring). 235 skip []string 236} 237 238var parseStackTrace *regexp.Regexp 239 240func compile(re string) *regexp.Regexp { 241 re = strings.Replace(re, "{{ADDR}}", "0x[0-9a-f]+", -1) 242 re = strings.Replace(re, "{{PC}}", "\\[\\<(?:0x)?[0-9a-f]+\\>\\]", -1) 243 re = strings.Replace(re, "{{FUNC}}", "([a-zA-Z0-9_]+)(?:\\.|\\+)", -1) 244 re = strings.Replace(re, "{{SRC}}", "([a-zA-Z0-9-_/.]+\\.[a-z]+:[0-9]+)", -1) 245 return regexp.MustCompile(re) 246} 247 248func containsCrash(output []byte, oopses []*oops, ignores []*regexp.Regexp) bool { 249 for pos := 0; pos < len(output); { 250 next := bytes.IndexByte(output[pos:], '\n') 251 if next != -1 { 252 next += pos 253 } else { 254 next = len(output) 255 } 256 for _, oops := range oopses { 257 match := matchOops(output[pos:next], oops, ignores) 258 if match == -1 { 259 continue 260 } 261 return true 262 } 263 pos = next + 1 264 } 265 return false 266} 267 268func matchOops(line []byte, oops *oops, ignores []*regexp.Regexp) int { 269 match := bytes.Index(line, oops.header) 270 if match == -1 { 271 return -1 272 } 273 if matchesAny(line, oops.suppressions) { 274 return -1 275 } 276 if matchesAny(line, ignores) { 277 return -1 278 } 279 return match 280} 281 282func extractDescription(output []byte, oops *oops, params *stackParams) ( 283 desc string, corrupted string, format oopsFormat) { 284 startPos := len(output) 285 matchedTitle := false 286 for _, f := range oops.formats { 287 match := f.title.FindSubmatchIndex(output) 288 if match == nil || match[0] > startPos { 289 continue 290 } 291 if match[0] == startPos && desc != "" { 292 continue 293 } 294 if match[0] < startPos { 295 desc = "" 296 format = oopsFormat{} 297 startPos = match[0] 298 } 299 matchedTitle = true 300 if f.report != nil { 301 match = f.report.FindSubmatchIndex(output) 302 if match == nil { 303 continue 304 } 305 } 306 var args []interface{} 307 for i := 2; i < len(match); i += 2 { 308 args = append(args, string(output[match[i]:match[i+1]])) 309 } 310 corrupted = "" 311 if f.stack != nil { 312 frame := "" 313 frame, corrupted = extractStackFrame(params, f.stack, output[match[0]:]) 314 if frame == "" { 315 frame = "corrupted" 316 if corrupted == "" { 317 corrupted = "extracted no stack frame" 318 } 319 } 320 args = append(args, frame) 321 } 322 desc = fmt.Sprintf(f.fmt, args...) 323 format = f 324 } 325 if len(desc) == 0 { 326 // If we are here and matchedTitle is set, it means that we've matched 327 // a title of an oops but not full report regexp or stack trace, 328 // which means the report was corrupted. 329 if matchedTitle { 330 corrupted = "matched title but not report regexp" 331 } 332 pos := bytes.Index(output, oops.header) 333 if pos == -1 { 334 return 335 } 336 end := bytes.IndexByte(output[pos:], '\n') 337 if end == -1 { 338 end = len(output) 339 } else { 340 end += pos 341 } 342 desc = string(output[pos:end]) 343 } 344 if corrupted == "" && format.corrupted { 345 corrupted = "report format is marked as corrupted" 346 } 347 return 348} 349 350type stackParams struct { 351 // stackStartRes matches start of stack traces. 352 stackStartRes []*regexp.Regexp 353 // frameRes match different formats of lines containing kernel frames (capture function name). 354 frameRes []*regexp.Regexp 355 // skipPatterns match functions that must be unconditionally skipped. 356 skipPatterns []string 357 // If we looked at any lines that match corruptedLines during report analysis, 358 // then the report is marked as corrupted. 359 corruptedLines []*regexp.Regexp 360} 361 362func extractStackFrame(params *stackParams, stack *stackFmt, output []byte) (string, string) { 363 skip := append([]string{}, params.skipPatterns...) 364 skip = append(skip, stack.skip...) 365 var skipRe *regexp.Regexp 366 if len(skip) != 0 { 367 skipRe = regexp.MustCompile(strings.Join(skip, "|")) 368 } 369 frame, corrupted := extractStackFrameImpl(params, output, skipRe, stack.parts) 370 if frame != "" || len(stack.parts2) == 0 { 371 return frame, corrupted 372 } 373 return extractStackFrameImpl(params, output, skipRe, stack.parts2) 374} 375 376func extractStackFrameImpl(params *stackParams, output []byte, skipRe *regexp.Regexp, 377 parts []*regexp.Regexp) (string, string) { 378 corrupted := "" 379 s := bufio.NewScanner(bytes.NewReader(output)) 380nextPart: 381 for _, part := range parts { 382 if part == parseStackTrace { 383 for s.Scan() { 384 ln := bytes.Trim(s.Bytes(), "\r") 385 if corrupted == "" && matchesAny(ln, params.corruptedLines) { 386 corrupted = "corrupted line in report (1)" 387 } 388 if matchesAny(ln, params.stackStartRes) { 389 continue nextPart 390 } 391 var match []int 392 for _, re := range params.frameRes { 393 match = re.FindSubmatchIndex(ln) 394 if match != nil { 395 break 396 } 397 } 398 if match == nil { 399 continue 400 } 401 frame := ln[match[2]:match[3]] 402 if skipRe == nil || !skipRe.Match(frame) { 403 return string(frame), corrupted 404 } 405 } 406 } else { 407 for s.Scan() { 408 ln := bytes.Trim(s.Bytes(), "\r") 409 if corrupted == "" && matchesAny(ln, params.corruptedLines) { 410 corrupted = "corrupted line in report (2)" 411 } 412 match := part.FindSubmatchIndex(ln) 413 if match == nil { 414 continue 415 } 416 if len(match) == 4 && match[2] != -1 { 417 frame := ln[match[2]:match[3]] 418 if skipRe == nil || !skipRe.Match(frame) { 419 return string(frame), corrupted 420 } 421 } 422 break 423 } 424 } 425 } 426 return "", corrupted 427} 428 429func simpleLineParser(output []byte, oopses []*oops, params *stackParams, ignores []*regexp.Regexp) *Report { 430 rep := &Report{ 431 Output: output, 432 } 433 var oops *oops 434 for pos := 0; pos < len(output); { 435 next := bytes.IndexByte(output[pos:], '\n') 436 if next != -1 { 437 next += pos 438 } else { 439 next = len(output) 440 } 441 line := output[pos:next] 442 for _, oops1 := range oopses { 443 match := matchOops(line, oops1, ignores) 444 if match != -1 { 445 oops = oops1 446 rep.StartPos = pos 447 break 448 } 449 } 450 if oops != nil { 451 break 452 } 453 pos = next + 1 454 } 455 if oops == nil { 456 return nil 457 } 458 title, corrupted, _ := extractDescription(output[rep.StartPos:], oops, params) 459 rep.Title = title 460 rep.Report = output[rep.StartPos:] 461 rep.Corrupted = corrupted != "" 462 rep.CorruptedReason = corrupted 463 return rep 464} 465 466func matchesAny(line []byte, res []*regexp.Regexp) bool { 467 for _, re := range res { 468 if re.Match(line) { 469 return true 470 } 471 } 472 return false 473} 474 475// replace replaces [start:end] in where with what, inplace. 476func replace(where []byte, start, end int, what []byte) []byte { 477 if len(what) >= end-start { 478 where = append(where, what[end-start:]...) 479 copy(where[start+len(what):], where[end:]) 480 copy(where[start:], what) 481 } else { 482 copy(where[start+len(what):], where[end:]) 483 where = where[:len(where)-(end-start-len(what))] 484 copy(where[start:], what) 485 } 486 return where 487} 488 489var ( 490 filenameRe = regexp.MustCompile(`[a-zA-Z0-9_\-\./]*[a-zA-Z0-9_\-]+\.(c|h):[0-9]+`) 491) 492