1// Copyright 2015 syzkaller project authors. All rights reserved. 2// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4package main 5 6import ( 7 "bytes" 8 "encoding/json" 9 "flag" 10 "fmt" 11 "math/rand" 12 "net" 13 "os" 14 "os/exec" 15 "path/filepath" 16 "sync" 17 "sync/atomic" 18 "time" 19 20 "github.com/google/syzkaller/dashboard/dashapi" 21 "github.com/google/syzkaller/pkg/cover" 22 "github.com/google/syzkaller/pkg/csource" 23 "github.com/google/syzkaller/pkg/db" 24 "github.com/google/syzkaller/pkg/gce" 25 "github.com/google/syzkaller/pkg/hash" 26 "github.com/google/syzkaller/pkg/instance" 27 "github.com/google/syzkaller/pkg/log" 28 "github.com/google/syzkaller/pkg/mgrconfig" 29 "github.com/google/syzkaller/pkg/osutil" 30 "github.com/google/syzkaller/pkg/report" 31 "github.com/google/syzkaller/pkg/repro" 32 "github.com/google/syzkaller/pkg/rpctype" 33 "github.com/google/syzkaller/pkg/signal" 34 "github.com/google/syzkaller/prog" 35 "github.com/google/syzkaller/sys" 36 "github.com/google/syzkaller/vm" 37) 38 39var ( 40 flagConfig = flag.String("config", "", "configuration file") 41 flagDebug = flag.Bool("debug", false, "dump all VM output to console") 42 flagBench = flag.String("bench", "", "write execution statistics into this file periodically") 43) 44 45type Manager struct { 46 cfg *mgrconfig.Config 47 vmPool *vm.Pool 48 target *prog.Target 49 reporter report.Reporter 50 crashdir string 51 port int 52 corpusDB *db.DB 53 startTime time.Time 54 firstConnect time.Time 55 fuzzingTime time.Duration 56 stats *Stats 57 fuzzerStats map[string]uint64 58 crashTypes map[string]bool 59 vmStop chan bool 60 checkResult *rpctype.CheckArgs 61 fresh bool 62 numFuzzing uint32 63 numReproducing uint32 64 65 dash *dashapi.Dashboard 66 67 mu sync.Mutex 68 phase int 69 enabledSyscalls []int 70 71 candidates []rpctype.RPCCandidate // untriaged inputs from corpus and hub 72 disabledHashes map[string]struct{} 73 corpus map[string]rpctype.RPCInput 74 corpusCover cover.Cover 75 corpusSignal signal.Signal 76 maxSignal signal.Signal 77 prios [][]float32 78 newRepros [][]byte 79 80 fuzzers map[string]*Fuzzer 81 needMoreRepros chan chan bool 82 hubReproQueue chan *Crash 83 reproRequest chan chan map[string]bool 84 85 // For checking that files that we are using are not changing under us. 86 // Maps file name to modification time. 87 usedFiles map[string]time.Time 88} 89 90const ( 91 // Just started, nothing done yet. 92 phaseInit = iota 93 // Corpus is loaded and machine is checked. 94 phaseLoadedCorpus 95 // Triaged all inputs from corpus. 96 // This is when we start querying hub and minimizing persistent corpus. 97 phaseTriagedCorpus 98 // Done the first request to hub. 99 phaseQueriedHub 100 // Triaged all new inputs from hub. 101 // This is when we start reproducing crashes. 102 phaseTriagedHub 103) 104 105const currentDBVersion = 3 106 107type Fuzzer struct { 108 name string 109 inputs []rpctype.RPCInput 110 newMaxSignal signal.Signal 111} 112 113type Crash struct { 114 vmIndex int 115 hub bool // this crash was created based on a repro from hub 116 *report.Report 117} 118 119func main() { 120 if sys.GitRevision == "" { 121 log.Fatalf("Bad syz-manager build. Build with make, run bin/syz-manager.") 122 } 123 flag.Parse() 124 log.EnableLogCaching(1000, 1<<20) 125 cfg, err := mgrconfig.LoadFile(*flagConfig) 126 if err != nil { 127 log.Fatalf("%v", err) 128 } 129 target, err := prog.GetTarget(cfg.TargetOS, cfg.TargetArch) 130 if err != nil { 131 log.Fatalf("%v", err) 132 } 133 syscalls, err := mgrconfig.ParseEnabledSyscalls(target, cfg.EnabledSyscalls, cfg.DisabledSyscalls) 134 if err != nil { 135 log.Fatalf("%v", err) 136 } 137 RunManager(cfg, target, syscalls) 138} 139 140func RunManager(cfg *mgrconfig.Config, target *prog.Target, syscalls map[int]bool) { 141 var vmPool *vm.Pool 142 // Type "none" is a special case for debugging/development when manager 143 // does not start any VMs, but instead you start them manually 144 // and start syz-fuzzer there. 145 if cfg.Type != "none" { 146 var err error 147 vmPool, err = vm.Create(cfg, *flagDebug) 148 if err != nil { 149 log.Fatalf("%v", err) 150 } 151 } 152 153 crashdir := filepath.Join(cfg.Workdir, "crashes") 154 osutil.MkdirAll(crashdir) 155 156 var enabledSyscalls []int 157 for c := range syscalls { 158 enabledSyscalls = append(enabledSyscalls, c) 159 } 160 161 reporter, err := report.NewReporter(cfg) 162 if err != nil { 163 log.Fatalf("%v", err) 164 } 165 166 mgr := &Manager{ 167 cfg: cfg, 168 vmPool: vmPool, 169 target: target, 170 reporter: reporter, 171 crashdir: crashdir, 172 startTime: time.Now(), 173 stats: new(Stats), 174 fuzzerStats: make(map[string]uint64), 175 crashTypes: make(map[string]bool), 176 enabledSyscalls: enabledSyscalls, 177 corpus: make(map[string]rpctype.RPCInput), 178 disabledHashes: make(map[string]struct{}), 179 fuzzers: make(map[string]*Fuzzer), 180 fresh: true, 181 vmStop: make(chan bool), 182 hubReproQueue: make(chan *Crash, 10), 183 needMoreRepros: make(chan chan bool), 184 reproRequest: make(chan chan map[string]bool), 185 usedFiles: make(map[string]time.Time), 186 } 187 188 log.Logf(0, "loading corpus...") 189 mgr.corpusDB, err = db.Open(filepath.Join(cfg.Workdir, "corpus.db")) 190 if err != nil { 191 log.Fatalf("failed to open corpus database: %v", err) 192 } 193 194 // Create HTTP server. 195 mgr.initHTTP() 196 mgr.collectUsedFiles() 197 198 // Create RPC server for fuzzers. 199 s, err := rpctype.NewRPCServer(cfg.RPC, mgr) 200 if err != nil { 201 log.Fatalf("failed to create rpc server: %v", err) 202 } 203 log.Logf(0, "serving rpc on tcp://%v", s.Addr()) 204 mgr.port = s.Addr().(*net.TCPAddr).Port 205 go s.Serve() 206 207 if cfg.DashboardAddr != "" { 208 mgr.dash = dashapi.New(cfg.DashboardClient, cfg.DashboardAddr, cfg.DashboardKey) 209 } 210 211 go func() { 212 for lastTime := time.Now(); ; { 213 time.Sleep(10 * time.Second) 214 now := time.Now() 215 diff := now.Sub(lastTime) 216 lastTime = now 217 mgr.mu.Lock() 218 if mgr.firstConnect.IsZero() { 219 mgr.mu.Unlock() 220 continue 221 } 222 mgr.fuzzingTime += diff * time.Duration(atomic.LoadUint32(&mgr.numFuzzing)) 223 executed := mgr.stats.execTotal.get() 224 crashes := mgr.stats.crashes.get() 225 signal := mgr.corpusSignal.Len() 226 mgr.mu.Unlock() 227 numReproducing := atomic.LoadUint32(&mgr.numReproducing) 228 numFuzzing := atomic.LoadUint32(&mgr.numFuzzing) 229 230 log.Logf(0, "VMs %v, executed %v, cover %v, crashes %v, repro %v", 231 numFuzzing, executed, signal, crashes, numReproducing) 232 } 233 }() 234 235 if *flagBench != "" { 236 f, err := os.OpenFile(*flagBench, os.O_WRONLY|os.O_CREATE|os.O_EXCL, osutil.DefaultFilePerm) 237 if err != nil { 238 log.Fatalf("failed to open bench file: %v", err) 239 } 240 go func() { 241 for { 242 time.Sleep(time.Minute) 243 vals := make(map[string]uint64) 244 mgr.mu.Lock() 245 if mgr.firstConnect.IsZero() { 246 mgr.mu.Unlock() 247 continue 248 } 249 mgr.minimizeCorpus() 250 vals["corpus"] = uint64(len(mgr.corpus)) 251 vals["uptime"] = uint64(time.Since(mgr.firstConnect)) / 1e9 252 vals["fuzzing"] = uint64(mgr.fuzzingTime) / 1e9 253 vals["signal"] = uint64(mgr.corpusSignal.Len()) 254 vals["coverage"] = uint64(len(mgr.corpusCover)) 255 for k, v := range mgr.fuzzerStats { 256 vals[k] = v 257 } 258 mgr.mu.Unlock() 259 for k, v := range mgr.stats.all() { 260 vals[k] = v 261 } 262 263 data, err := json.MarshalIndent(vals, "", " ") 264 if err != nil { 265 log.Fatalf("failed to serialize bench data") 266 } 267 if _, err := f.Write(append(data, '\n')); err != nil { 268 log.Fatalf("failed to write bench data") 269 } 270 } 271 }() 272 } 273 274 if mgr.dash != nil { 275 go mgr.dashboardReporter() 276 } 277 278 osutil.HandleInterrupts(vm.Shutdown) 279 if mgr.vmPool == nil { 280 log.Logf(0, "no VMs started (type=none)") 281 log.Logf(0, "you are supposed to start syz-fuzzer manually as:") 282 log.Logf(0, "syz-fuzzer -manager=manager.ip:%v [other flags as necessary]", mgr.port) 283 <-vm.Shutdown 284 return 285 } 286 mgr.vmLoop() 287} 288 289type RunResult struct { 290 idx int 291 crash *Crash 292 err error 293} 294 295type ReproResult struct { 296 instances []int 297 title0 string 298 res *repro.Result 299 stats *repro.Stats 300 err error 301 hub bool // repro came from hub 302} 303 304// Manager needs to be refactored (#605). 305// nolint: gocyclo 306func (mgr *Manager) vmLoop() { 307 log.Logf(0, "booting test machines...") 308 log.Logf(0, "wait for the connection from test machine...") 309 instancesPerRepro := 4 310 vmCount := mgr.vmPool.Count() 311 if instancesPerRepro > vmCount { 312 instancesPerRepro = vmCount 313 } 314 instances := make([]int, vmCount) 315 for i := range instances { 316 instances[i] = vmCount - i - 1 317 } 318 runDone := make(chan *RunResult, 1) 319 pendingRepro := make(map[*Crash]bool) 320 reproducing := make(map[string]bool) 321 reproInstances := 0 322 var reproQueue []*Crash 323 reproDone := make(chan *ReproResult, 1) 324 stopPending := false 325 shutdown := vm.Shutdown 326 for shutdown != nil || len(instances) != vmCount { 327 mgr.mu.Lock() 328 phase := mgr.phase 329 mgr.mu.Unlock() 330 331 for crash := range pendingRepro { 332 if reproducing[crash.Title] { 333 continue 334 } 335 delete(pendingRepro, crash) 336 if !mgr.needRepro(crash) { 337 continue 338 } 339 log.Logf(1, "loop: add to repro queue '%v'", crash.Title) 340 reproducing[crash.Title] = true 341 reproQueue = append(reproQueue, crash) 342 } 343 344 log.Logf(1, "loop: phase=%v shutdown=%v instances=%v/%v %+v repro: pending=%v reproducing=%v queued=%v", 345 phase, shutdown == nil, len(instances), vmCount, instances, 346 len(pendingRepro), len(reproducing), len(reproQueue)) 347 348 canRepro := func() bool { 349 return phase >= phaseTriagedHub && 350 len(reproQueue) != 0 && reproInstances+instancesPerRepro <= vmCount 351 } 352 353 if shutdown != nil { 354 for canRepro() && len(instances) >= instancesPerRepro { 355 last := len(reproQueue) - 1 356 crash := reproQueue[last] 357 reproQueue[last] = nil 358 reproQueue = reproQueue[:last] 359 vmIndexes := append([]int{}, instances[len(instances)-instancesPerRepro:]...) 360 instances = instances[:len(instances)-instancesPerRepro] 361 reproInstances += instancesPerRepro 362 atomic.AddUint32(&mgr.numReproducing, 1) 363 log.Logf(1, "loop: starting repro of '%v' on instances %+v", crash.Title, vmIndexes) 364 go func() { 365 res, stats, err := repro.Run(crash.Output, mgr.cfg, mgr.reporter, mgr.vmPool, vmIndexes) 366 reproDone <- &ReproResult{vmIndexes, crash.Title, res, stats, err, crash.hub} 367 }() 368 } 369 for !canRepro() && len(instances) != 0 { 370 last := len(instances) - 1 371 idx := instances[last] 372 instances = instances[:last] 373 log.Logf(1, "loop: starting instance %v", idx) 374 go func() { 375 crash, err := mgr.runInstance(idx) 376 runDone <- &RunResult{idx, crash, err} 377 }() 378 } 379 } 380 381 var stopRequest chan bool 382 if !stopPending && canRepro() { 383 stopRequest = mgr.vmStop 384 } 385 386 wait: 387 select { 388 case stopRequest <- true: 389 log.Logf(1, "loop: issued stop request") 390 stopPending = true 391 case res := <-runDone: 392 log.Logf(1, "loop: instance %v finished, crash=%v", res.idx, res.crash != nil) 393 if res.err != nil && shutdown != nil { 394 log.Logf(0, "%v", res.err) 395 } 396 stopPending = false 397 instances = append(instances, res.idx) 398 // On shutdown qemu crashes with "qemu: terminating on signal 2", 399 // which we detect as "lost connection". Don't save that as crash. 400 if shutdown != nil && res.crash != nil { 401 needRepro := mgr.saveCrash(res.crash) 402 if needRepro { 403 log.Logf(1, "loop: add pending repro for '%v'", res.crash.Title) 404 pendingRepro[res.crash] = true 405 } 406 } 407 case res := <-reproDone: 408 atomic.AddUint32(&mgr.numReproducing, ^uint32(0)) 409 crepro := false 410 title := "" 411 if res.res != nil { 412 crepro = res.res.CRepro 413 title = res.res.Report.Title 414 } 415 log.Logf(1, "loop: repro on %+v finished '%v', repro=%v crepro=%v desc='%v'", 416 res.instances, res.title0, res.res != nil, crepro, title) 417 if res.err != nil { 418 log.Logf(0, "repro failed: %v", res.err) 419 } 420 delete(reproducing, res.title0) 421 instances = append(instances, res.instances...) 422 reproInstances -= instancesPerRepro 423 if res.res == nil { 424 if !res.hub { 425 mgr.saveFailedRepro(res.title0, res.stats) 426 } 427 } else { 428 mgr.saveRepro(res.res, res.stats, res.hub) 429 } 430 case <-shutdown: 431 log.Logf(1, "loop: shutting down...") 432 shutdown = nil 433 case crash := <-mgr.hubReproQueue: 434 log.Logf(1, "loop: get repro from hub") 435 pendingRepro[crash] = true 436 case reply := <-mgr.needMoreRepros: 437 reply <- phase >= phaseTriagedHub && 438 len(reproQueue)+len(pendingRepro)+len(reproducing) == 0 439 goto wait 440 case reply := <-mgr.reproRequest: 441 repros := make(map[string]bool) 442 for title := range reproducing { 443 repros[title] = true 444 } 445 reply <- repros 446 goto wait 447 } 448 } 449} 450 451func (mgr *Manager) loadCorpus() { 452 // By default we don't re-minimize/re-smash programs from corpus, 453 // it takes lots of time on start and is unnecessary. 454 // However, on version bumps we can selectively re-minimize/re-smash. 455 minimized, smashed := true, true 456 switch mgr.corpusDB.Version { 457 case 0: 458 // Version 0 had broken minimization, so we need to re-minimize. 459 minimized = false 460 fallthrough 461 case 1: 462 // Version 1->2: memory is preallocated so lots of mmaps become unnecessary. 463 minimized = false 464 fallthrough 465 case 2: 466 // Version 2->3: big-endian hints. 467 smashed = false 468 fallthrough 469 case currentDBVersion: 470 } 471 syscalls := make(map[int]bool) 472 for _, id := range mgr.checkResult.EnabledCalls[mgr.cfg.Sandbox] { 473 syscalls[id] = true 474 } 475 deleted := 0 476 for key, rec := range mgr.corpusDB.Records { 477 p, err := mgr.target.Deserialize(rec.Val) 478 if err != nil { 479 if deleted < 10 { 480 log.Logf(0, "deleting broken program: %v\n%s", err, rec.Val) 481 } 482 mgr.corpusDB.Delete(key) 483 deleted++ 484 continue 485 } 486 disabled := false 487 for _, c := range p.Calls { 488 if !syscalls[c.Meta.ID] { 489 disabled = true 490 break 491 } 492 } 493 if disabled { 494 // This program contains a disabled syscall. 495 // We won't execute it, but remember its hash so 496 // it is not deleted during minimization. 497 mgr.disabledHashes[hash.String(rec.Val)] = struct{}{} 498 continue 499 } 500 mgr.candidates = append(mgr.candidates, rpctype.RPCCandidate{ 501 Prog: rec.Val, 502 Minimized: minimized, 503 Smashed: smashed, 504 }) 505 } 506 mgr.fresh = len(mgr.corpusDB.Records) == 0 507 log.Logf(0, "%-24v: %v (%v deleted)", "corpus", len(mgr.candidates), deleted) 508 509 // Now this is ugly. 510 // We duplicate all inputs in the corpus and shuffle the second part. 511 // This solves the following problem. A fuzzer can crash while triaging candidates, 512 // in such case it will also lost all cached candidates. Or, the input can be somewhat flaky 513 // and doesn't give the coverage on first try. So we give each input the second chance. 514 // Shuffling should alleviate deterministically losing the same inputs on fuzzer crashing. 515 mgr.candidates = append(mgr.candidates, mgr.candidates...) 516 shuffle := mgr.candidates[len(mgr.candidates)/2:] 517 for i := range shuffle { 518 j := i + rand.Intn(len(shuffle)-i) 519 shuffle[i], shuffle[j] = shuffle[j], shuffle[i] 520 } 521 if mgr.phase != phaseInit { 522 panic(fmt.Sprintf("loadCorpus: bad phase %v", mgr.phase)) 523 } 524 mgr.phase = phaseLoadedCorpus 525} 526 527func (mgr *Manager) runInstance(index int) (*Crash, error) { 528 mgr.checkUsedFiles() 529 inst, err := mgr.vmPool.Create(index) 530 if err != nil { 531 return nil, fmt.Errorf("failed to create instance: %v", err) 532 } 533 defer inst.Close() 534 535 fwdAddr, err := inst.Forward(mgr.port) 536 if err != nil { 537 return nil, fmt.Errorf("failed to setup port forwarding: %v", err) 538 } 539 fuzzerBin, err := inst.Copy(mgr.cfg.SyzFuzzerBin) 540 if err != nil { 541 return nil, fmt.Errorf("failed to copy binary: %v", err) 542 } 543 executorBin, err := inst.Copy(mgr.cfg.SyzExecutorBin) 544 if err != nil { 545 return nil, fmt.Errorf("failed to copy binary: %v", err) 546 } 547 548 fuzzerV := 0 549 procs := mgr.cfg.Procs 550 if *flagDebug { 551 fuzzerV = 100 552 procs = 1 553 } 554 555 // Run the fuzzer binary. 556 start := time.Now() 557 atomic.AddUint32(&mgr.numFuzzing, 1) 558 defer atomic.AddUint32(&mgr.numFuzzing, ^uint32(0)) 559 cmd := instance.FuzzerCmd(fuzzerBin, executorBin, fmt.Sprintf("vm-%v", index), 560 mgr.cfg.TargetOS, mgr.cfg.TargetArch, fwdAddr, mgr.cfg.Sandbox, procs, fuzzerV, 561 mgr.cfg.Cover, *flagDebug, false, false) 562 outc, errc, err := inst.Run(time.Hour, mgr.vmStop, cmd) 563 if err != nil { 564 return nil, fmt.Errorf("failed to run fuzzer: %v", err) 565 } 566 567 rep := inst.MonitorExecution(outc, errc, mgr.reporter, false) 568 if rep == nil { 569 // This is the only "OK" outcome. 570 log.Logf(0, "vm-%v: running for %v, restarting", index, time.Since(start)) 571 return nil, nil 572 } 573 crash := &Crash{ 574 vmIndex: index, 575 hub: false, 576 Report: rep, 577 } 578 return crash, nil 579} 580 581func (mgr *Manager) emailCrash(crash *Crash) { 582 if len(mgr.cfg.EmailAddrs) == 0 { 583 return 584 } 585 args := []string{"-s", "syzkaller: " + crash.Title} 586 args = append(args, mgr.cfg.EmailAddrs...) 587 log.Logf(0, "sending email to %v", mgr.cfg.EmailAddrs) 588 589 cmd := exec.Command("mailx", args...) 590 cmd.Stdin = bytes.NewReader(crash.Report.Report) 591 if _, err := osutil.Run(10*time.Minute, cmd); err != nil { 592 log.Logf(0, "failed to send email: %v", err) 593 } 594} 595 596func (mgr *Manager) saveCrash(crash *Crash) bool { 597 if crash.Suppressed { 598 log.Logf(0, "vm-%v: suppressed crash %v", crash.vmIndex, crash.Title) 599 mgr.stats.crashSuppressed.inc() 600 return false 601 } 602 corrupted := "" 603 if crash.Corrupted { 604 corrupted = " [corrupted]" 605 } 606 log.Logf(0, "vm-%v: crash: %v%v", crash.vmIndex, crash.Title, corrupted) 607 if err := mgr.reporter.Symbolize(crash.Report); err != nil { 608 log.Logf(0, "failed to symbolize report: %v", err) 609 } 610 611 mgr.stats.crashes.inc() 612 mgr.mu.Lock() 613 if !mgr.crashTypes[crash.Title] { 614 mgr.crashTypes[crash.Title] = true 615 mgr.stats.crashTypes.inc() 616 } 617 mgr.mu.Unlock() 618 619 if mgr.dash != nil { 620 dc := &dashapi.Crash{ 621 BuildID: mgr.cfg.Tag, 622 Title: crash.Title, 623 Corrupted: crash.Corrupted, 624 Maintainers: crash.Maintainers, 625 Log: crash.Output, 626 Report: crash.Report.Report, 627 } 628 resp, err := mgr.dash.ReportCrash(dc) 629 if err != nil { 630 log.Logf(0, "failed to report crash to dashboard: %v", err) 631 } else { 632 // Don't store the crash locally, if we've successfully 633 // uploaded it to the dashboard. These will just eat disk space. 634 return resp.NeedRepro 635 } 636 } 637 638 sig := hash.Hash([]byte(crash.Title)) 639 id := sig.String() 640 dir := filepath.Join(mgr.crashdir, id) 641 osutil.MkdirAll(dir) 642 if err := osutil.WriteFile(filepath.Join(dir, "description"), []byte(crash.Title+"\n")); err != nil { 643 log.Logf(0, "failed to write crash: %v", err) 644 } 645 // Save up to 100 reports. If we already have 100, overwrite the oldest one. 646 // Newer reports are generally more useful. Overwriting is also needed 647 // to be able to understand if a particular bug still happens or already fixed. 648 oldestI := 0 649 var oldestTime time.Time 650 for i := 0; i < 100; i++ { 651 info, err := os.Stat(filepath.Join(dir, fmt.Sprintf("log%v", i))) 652 if err != nil { 653 oldestI = i 654 if i == 0 { 655 go mgr.emailCrash(crash) 656 } 657 break 658 } 659 if oldestTime.IsZero() || info.ModTime().Before(oldestTime) { 660 oldestI = i 661 oldestTime = info.ModTime() 662 } 663 } 664 osutil.WriteFile(filepath.Join(dir, fmt.Sprintf("log%v", oldestI)), crash.Output) 665 if len(mgr.cfg.Tag) > 0 { 666 osutil.WriteFile(filepath.Join(dir, fmt.Sprintf("tag%v", oldestI)), []byte(mgr.cfg.Tag)) 667 } 668 if len(crash.Report.Report) > 0 { 669 osutil.WriteFile(filepath.Join(dir, fmt.Sprintf("report%v", oldestI)), crash.Report.Report) 670 } 671 672 return mgr.needLocalRepro(crash) 673} 674 675const maxReproAttempts = 3 676 677func (mgr *Manager) needLocalRepro(crash *Crash) bool { 678 if !mgr.cfg.Reproduce || crash.Corrupted { 679 return false 680 } 681 sig := hash.Hash([]byte(crash.Title)) 682 dir := filepath.Join(mgr.crashdir, sig.String()) 683 if osutil.IsExist(filepath.Join(dir, "repro.prog")) { 684 return false 685 } 686 for i := 0; i < maxReproAttempts; i++ { 687 if !osutil.IsExist(filepath.Join(dir, fmt.Sprintf("repro%v", i))) { 688 return true 689 } 690 } 691 return false 692} 693 694func (mgr *Manager) needRepro(crash *Crash) bool { 695 if crash.hub { 696 return true 697 } 698 if mgr.dash == nil { 699 return mgr.needLocalRepro(crash) 700 } 701 cid := &dashapi.CrashID{ 702 BuildID: mgr.cfg.Tag, 703 Title: crash.Title, 704 Corrupted: crash.Corrupted, 705 } 706 needRepro, err := mgr.dash.NeedRepro(cid) 707 if err != nil { 708 log.Logf(0, "dashboard.NeedRepro failed: %v", err) 709 } 710 return needRepro 711} 712 713func (mgr *Manager) saveFailedRepro(desc string, stats *repro.Stats) { 714 if mgr.dash != nil { 715 cid := &dashapi.CrashID{ 716 BuildID: mgr.cfg.Tag, 717 Title: desc, 718 } 719 if err := mgr.dash.ReportFailedRepro(cid); err != nil { 720 log.Logf(0, "failed to report failed repro to dashboard: %v", err) 721 } else { 722 return 723 } 724 } 725 dir := filepath.Join(mgr.crashdir, hash.String([]byte(desc))) 726 osutil.MkdirAll(dir) 727 for i := 0; i < maxReproAttempts; i++ { 728 name := filepath.Join(dir, fmt.Sprintf("repro%v", i)) 729 if !osutil.IsExist(name) { 730 saveReproStats(name, stats) 731 break 732 } 733 } 734} 735 736func (mgr *Manager) saveRepro(res *repro.Result, stats *repro.Stats, hub bool) { 737 rep := res.Report 738 if err := mgr.reporter.Symbolize(rep); err != nil { 739 log.Logf(0, "failed to symbolize repro: %v", err) 740 } 741 opts := fmt.Sprintf("# %+v\n", res.Opts) 742 prog := res.Prog.Serialize() 743 744 // Append this repro to repro list to send to hub if it didn't come from hub originally. 745 if !hub { 746 progForHub := []byte(fmt.Sprintf("# %+v\n# %v\n# %v\n%s", 747 res.Opts, res.Report.Title, mgr.cfg.Tag, prog)) 748 mgr.mu.Lock() 749 mgr.newRepros = append(mgr.newRepros, progForHub) 750 mgr.mu.Unlock() 751 } 752 753 var cprogText []byte 754 if res.CRepro { 755 cprog, err := csource.Write(res.Prog, res.Opts) 756 if err == nil { 757 formatted, err := csource.Format(cprog) 758 if err == nil { 759 cprog = formatted 760 } 761 cprogText = cprog 762 } else { 763 log.Logf(0, "failed to write C source: %v", err) 764 } 765 } 766 767 if mgr.dash != nil { 768 // Note: we intentionally don't set Corrupted for reproducers: 769 // 1. This is reproducible so can be debugged even with corrupted report. 770 // 2. Repro re-tried 3 times and still got corrupted report at the end, 771 // so maybe corrupted report detection is broken. 772 // 3. Reproduction is expensive so it's good to persist the result. 773 dc := &dashapi.Crash{ 774 BuildID: mgr.cfg.Tag, 775 Title: res.Report.Title, 776 Maintainers: res.Report.Maintainers, 777 Log: res.Report.Output, 778 Report: res.Report.Report, 779 ReproOpts: res.Opts.Serialize(), 780 ReproSyz: res.Prog.Serialize(), 781 ReproC: cprogText, 782 } 783 if _, err := mgr.dash.ReportCrash(dc); err != nil { 784 log.Logf(0, "failed to report repro to dashboard: %v", err) 785 } else { 786 // Don't store the crash locally, if we've successfully 787 // uploaded it to the dashboard. These will just eat disk space. 788 return 789 } 790 } 791 792 dir := filepath.Join(mgr.crashdir, hash.String([]byte(rep.Title))) 793 osutil.MkdirAll(dir) 794 795 if err := osutil.WriteFile(filepath.Join(dir, "description"), []byte(rep.Title+"\n")); err != nil { 796 log.Logf(0, "failed to write crash: %v", err) 797 } 798 osutil.WriteFile(filepath.Join(dir, "repro.prog"), append([]byte(opts), prog...)) 799 if len(mgr.cfg.Tag) > 0 { 800 osutil.WriteFile(filepath.Join(dir, "repro.tag"), []byte(mgr.cfg.Tag)) 801 } 802 if len(rep.Output) > 0 { 803 osutil.WriteFile(filepath.Join(dir, "repro.log"), rep.Output) 804 } 805 if len(rep.Report) > 0 { 806 osutil.WriteFile(filepath.Join(dir, "repro.report"), rep.Report) 807 } 808 if len(cprogText) > 0 { 809 osutil.WriteFile(filepath.Join(dir, "repro.cprog"), cprogText) 810 } 811 saveReproStats(filepath.Join(dir, "repro.stats"), stats) 812} 813 814func saveReproStats(filename string, stats *repro.Stats) { 815 text := "" 816 if stats != nil { 817 text = fmt.Sprintf("Extracting prog: %v\nMinimizing prog: %v\n"+ 818 "Simplifying prog options: %v\nExtracting C: %v\nSimplifying C: %v\n\n\n%s", 819 stats.ExtractProgTime, stats.MinimizeProgTime, 820 stats.SimplifyProgTime, stats.ExtractCTime, stats.SimplifyCTime, stats.Log) 821 } 822 osutil.WriteFile(filename, []byte(text)) 823} 824 825func (mgr *Manager) getMinimizedCorpus() (corpus, repros [][]byte) { 826 mgr.mu.Lock() 827 defer mgr.mu.Unlock() 828 mgr.minimizeCorpus() 829 corpus = make([][]byte, 0, len(mgr.corpus)) 830 for _, inp := range mgr.corpus { 831 corpus = append(corpus, inp.Prog) 832 } 833 repros = mgr.newRepros 834 mgr.newRepros = nil 835 return 836} 837 838func (mgr *Manager) addNewCandidates(progs [][]byte) { 839 candidates := make([]rpctype.RPCCandidate, len(progs)) 840 for i, inp := range progs { 841 candidates[i] = rpctype.RPCCandidate{ 842 Prog: inp, 843 Minimized: false, // don't trust programs from hub 844 Smashed: false, 845 } 846 } 847 mgr.mu.Lock() 848 defer mgr.mu.Unlock() 849 mgr.candidates = append(mgr.candidates, candidates...) 850 if mgr.phase == phaseTriagedCorpus { 851 mgr.phase = phaseQueriedHub 852 } 853} 854 855func (mgr *Manager) minimizeCorpus() { 856 if mgr.phase < phaseLoadedCorpus { 857 return 858 } 859 inputs := make([]signal.Context, 0, len(mgr.corpus)) 860 for _, inp := range mgr.corpus { 861 inputs = append(inputs, signal.Context{ 862 Signal: inp.Signal.Deserialize(), 863 Context: inp, 864 }) 865 } 866 newCorpus := make(map[string]rpctype.RPCInput) 867 for _, ctx := range signal.Minimize(inputs) { 868 inp := ctx.(rpctype.RPCInput) 869 newCorpus[hash.String(inp.Prog)] = inp 870 } 871 log.Logf(1, "minimized corpus: %v -> %v", len(mgr.corpus), len(newCorpus)) 872 mgr.corpus = newCorpus 873 874 // Don't minimize persistent corpus until fuzzers have triaged all inputs from it. 875 if mgr.phase < phaseTriagedCorpus { 876 return 877 } 878 for key := range mgr.corpusDB.Records { 879 _, ok1 := mgr.corpus[key] 880 _, ok2 := mgr.disabledHashes[key] 881 if !ok1 && !ok2 { 882 mgr.corpusDB.Delete(key) 883 } 884 } 885 mgr.corpusDB.BumpVersion(currentDBVersion) 886} 887 888func (mgr *Manager) Connect(a *rpctype.ConnectArgs, r *rpctype.ConnectRes) error { 889 log.Logf(1, "fuzzer %v connected", a.Name) 890 mgr.stats.vmRestarts.inc() 891 mgr.mu.Lock() 892 defer mgr.mu.Unlock() 893 894 f := &Fuzzer{ 895 name: a.Name, 896 } 897 mgr.fuzzers[a.Name] = f 898 mgr.minimizeCorpus() 899 f.newMaxSignal = mgr.maxSignal.Copy() 900 f.inputs = make([]rpctype.RPCInput, 0, len(mgr.corpus)) 901 for _, inp := range mgr.corpus { 902 f.inputs = append(f.inputs, inp) 903 } 904 r.EnabledCalls = mgr.enabledSyscalls 905 r.CheckResult = mgr.checkResult 906 r.GitRevision = sys.GitRevision 907 r.TargetRevision = mgr.target.Revision 908 return nil 909} 910 911func (mgr *Manager) Check(a *rpctype.CheckArgs, r *int) error { 912 mgr.mu.Lock() 913 defer mgr.mu.Unlock() 914 915 if mgr.checkResult != nil { 916 return nil 917 } 918 if len(mgr.cfg.EnabledSyscalls) != 0 && len(a.DisabledCalls[mgr.cfg.Sandbox]) != 0 { 919 disabled := make(map[string]string) 920 for _, dc := range a.DisabledCalls[mgr.cfg.Sandbox] { 921 disabled[mgr.target.Syscalls[dc.ID].Name] = dc.Reason 922 } 923 for _, id := range mgr.enabledSyscalls { 924 name := mgr.target.Syscalls[id].Name 925 if reason := disabled[name]; reason != "" { 926 log.Logf(0, "disabling %v: %v", name, reason) 927 } 928 } 929 } 930 if a.Error != "" { 931 log.Fatalf("machine check: %v", a.Error) 932 } 933 log.Logf(0, "machine check:") 934 log.Logf(0, "%-24v: %v/%v", "syscalls", 935 len(a.EnabledCalls[mgr.cfg.Sandbox]), len(mgr.target.Syscalls)) 936 for _, feat := range a.Features { 937 log.Logf(0, "%-24v: %v", feat.Name, feat.Reason) 938 } 939 a.DisabledCalls = nil 940 mgr.checkResult = a 941 mgr.loadCorpus() 942 mgr.firstConnect = time.Now() 943 return nil 944} 945 946func (mgr *Manager) NewInput(a *rpctype.NewInputArgs, r *int) error { 947 inputSignal := a.Signal.Deserialize() 948 log.Logf(4, "new input from %v for syscall %v (signal=%v, cover=%v)", 949 a.Name, a.Call, inputSignal.Len(), len(a.Cover)) 950 mgr.mu.Lock() 951 defer mgr.mu.Unlock() 952 953 f := mgr.fuzzers[a.Name] 954 if f == nil { 955 log.Fatalf("fuzzer %v is not connected", a.Name) 956 } 957 958 if _, err := mgr.target.Deserialize(a.RPCInput.Prog); err != nil { 959 // This should not happen, but we see such cases episodically, reason unknown. 960 log.Logf(0, "failed to deserialize program from fuzzer: %v\n%s", err, a.RPCInput.Prog) 961 return nil 962 } 963 if mgr.corpusSignal.Diff(inputSignal).Empty() { 964 return nil 965 } 966 mgr.stats.newInputs.inc() 967 mgr.corpusSignal.Merge(inputSignal) 968 mgr.corpusCover.Merge(a.Cover) 969 sig := hash.String(a.RPCInput.Prog) 970 if inp, ok := mgr.corpus[sig]; ok { 971 // The input is already present, but possibly with diffent signal/coverage/call. 972 inputSignal.Merge(inp.Signal.Deserialize()) 973 inp.Signal = inputSignal.Serialize() 974 var inputCover cover.Cover 975 inputCover.Merge(inp.Cover) 976 inputCover.Merge(a.RPCInput.Cover) 977 inp.Cover = inputCover.Serialize() 978 mgr.corpus[sig] = inp 979 } else { 980 mgr.corpus[sig] = a.RPCInput 981 mgr.corpusDB.Save(sig, a.RPCInput.Prog, 0) 982 if err := mgr.corpusDB.Flush(); err != nil { 983 log.Logf(0, "failed to save corpus database: %v", err) 984 } 985 for _, f1 := range mgr.fuzzers { 986 if f1 == f { 987 continue 988 } 989 inp := a.RPCInput 990 inp.Cover = nil // Don't send coverage back to all fuzzers. 991 f1.inputs = append(f1.inputs, inp) 992 } 993 } 994 return nil 995} 996 997func (mgr *Manager) Poll(a *rpctype.PollArgs, r *rpctype.PollRes) error { 998 mgr.mu.Lock() 999 defer mgr.mu.Unlock() 1000 1001 for k, v := range a.Stats { 1002 switch k { 1003 case "exec total": 1004 mgr.stats.execTotal.add(int(v)) 1005 default: 1006 mgr.fuzzerStats[k] += v 1007 } 1008 } 1009 1010 f := mgr.fuzzers[a.Name] 1011 if f == nil { 1012 log.Fatalf("fuzzer %v is not connected", a.Name) 1013 } 1014 newMaxSignal := mgr.maxSignal.Diff(a.MaxSignal.Deserialize()) 1015 if !newMaxSignal.Empty() { 1016 mgr.maxSignal.Merge(newMaxSignal) 1017 for _, f1 := range mgr.fuzzers { 1018 if f1 == f { 1019 continue 1020 } 1021 f1.newMaxSignal.Merge(newMaxSignal) 1022 } 1023 } 1024 r.MaxSignal = f.newMaxSignal.Split(500).Serialize() 1025 maxInputs := 5 1026 if maxInputs < mgr.cfg.Procs { 1027 maxInputs = mgr.cfg.Procs 1028 } 1029 if a.NeedCandidates { 1030 for i := 0; i < maxInputs && len(mgr.candidates) > 0; i++ { 1031 last := len(mgr.candidates) - 1 1032 r.Candidates = append(r.Candidates, mgr.candidates[last]) 1033 mgr.candidates[last] = rpctype.RPCCandidate{} 1034 mgr.candidates = mgr.candidates[:last] 1035 } 1036 } 1037 if len(r.Candidates) == 0 { 1038 for i := 0; i < maxInputs && len(f.inputs) > 0; i++ { 1039 last := len(f.inputs) - 1 1040 r.NewInputs = append(r.NewInputs, f.inputs[last]) 1041 f.inputs[last] = rpctype.RPCInput{} 1042 f.inputs = f.inputs[:last] 1043 } 1044 if len(f.inputs) == 0 { 1045 f.inputs = nil 1046 } 1047 } 1048 if len(mgr.candidates) == 0 { 1049 mgr.candidates = nil 1050 if mgr.phase == phaseLoadedCorpus { 1051 if mgr.cfg.HubClient != "" { 1052 mgr.phase = phaseTriagedCorpus 1053 go mgr.hubSyncLoop() 1054 } else { 1055 mgr.phase = phaseTriagedHub 1056 } 1057 } else if mgr.phase == phaseQueriedHub { 1058 mgr.phase = phaseTriagedHub 1059 } 1060 } 1061 log.Logf(4, "poll from %v: candidates=%v inputs=%v maxsignal=%v", 1062 a.Name, len(r.Candidates), len(r.NewInputs), len(r.MaxSignal.Elems)) 1063 return nil 1064} 1065 1066func (mgr *Manager) collectUsedFiles() { 1067 if mgr.vmPool == nil { 1068 return 1069 } 1070 addUsedFile := func(f string) { 1071 if f == "" { 1072 return 1073 } 1074 stat, err := os.Stat(f) 1075 if err != nil { 1076 log.Fatalf("failed to stat %v: %v", f, err) 1077 } 1078 mgr.usedFiles[f] = stat.ModTime() 1079 } 1080 cfg := mgr.cfg 1081 addUsedFile(cfg.SyzFuzzerBin) 1082 addUsedFile(cfg.SyzExecprogBin) 1083 addUsedFile(cfg.SyzExecutorBin) 1084 addUsedFile(cfg.SSHKey) 1085 if vmlinux := filepath.Join(cfg.KernelObj, "vmlinux"); osutil.IsExist(vmlinux) { 1086 addUsedFile(vmlinux) 1087 } 1088 if zircon := filepath.Join(cfg.KernelObj, "zircon.elf"); osutil.IsExist(zircon) { 1089 addUsedFile(zircon) 1090 } 1091 if cfg.Image != "9p" { 1092 addUsedFile(cfg.Image) 1093 } 1094} 1095 1096func (mgr *Manager) checkUsedFiles() { 1097 for f, mod := range mgr.usedFiles { 1098 stat, err := os.Stat(f) 1099 if err != nil { 1100 log.Fatalf("failed to stat %v: %v", f, err) 1101 } 1102 if mod != stat.ModTime() { 1103 log.Fatalf("file %v that syz-manager uses has been modified by an external program\n"+ 1104 "this can lead to arbitrary syz-manager misbehavior\n"+ 1105 "modification time has changed: %v -> %v\n"+ 1106 "don't modify files that syz-manager uses. exiting to prevent harm", 1107 f, mod, stat.ModTime()) 1108 } 1109 } 1110} 1111 1112func (mgr *Manager) dashboardReporter() { 1113 webAddr := publicWebAddr(mgr.cfg.HTTP) 1114 var lastFuzzingTime time.Duration 1115 var lastCrashes, lastExecs uint64 1116 for { 1117 time.Sleep(time.Minute) 1118 mgr.mu.Lock() 1119 if mgr.firstConnect.IsZero() { 1120 mgr.mu.Unlock() 1121 continue 1122 } 1123 crashes := mgr.stats.crashes.get() 1124 execs := mgr.stats.execTotal.get() 1125 req := &dashapi.ManagerStatsReq{ 1126 Name: mgr.cfg.Name, 1127 Addr: webAddr, 1128 UpTime: time.Since(mgr.firstConnect), 1129 Corpus: uint64(len(mgr.corpus)), 1130 Cover: uint64(mgr.corpusSignal.Len()), 1131 FuzzingTime: mgr.fuzzingTime - lastFuzzingTime, 1132 Crashes: crashes - lastCrashes, 1133 Execs: execs - lastExecs, 1134 } 1135 mgr.mu.Unlock() 1136 1137 if err := mgr.dash.UploadManagerStats(req); err != nil { 1138 log.Logf(0, "faield to upload dashboard stats: %v", err) 1139 continue 1140 } 1141 mgr.mu.Lock() 1142 lastFuzzingTime += req.FuzzingTime 1143 lastCrashes += req.Crashes 1144 lastExecs += req.Execs 1145 mgr.mu.Unlock() 1146 } 1147} 1148 1149func publicWebAddr(addr string) string { 1150 _, port, err := net.SplitHostPort(addr) 1151 if err == nil && port != "" { 1152 if host, err := os.Hostname(); err == nil { 1153 addr = net.JoinHostPort(host, port) 1154 } 1155 if GCE, err := gce.NewContext(); err == nil { 1156 addr = net.JoinHostPort(GCE.ExternalIP, port) 1157 } 1158 } 1159 return "http://" + addr 1160} 1161