• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2015 syzkaller project authors. All rights reserved.
2// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
3
4package main
5
6import (
7	"bytes"
8	"encoding/json"
9	"flag"
10	"fmt"
11	"math/rand"
12	"net"
13	"os"
14	"os/exec"
15	"path/filepath"
16	"sync"
17	"sync/atomic"
18	"time"
19
20	"github.com/google/syzkaller/dashboard/dashapi"
21	"github.com/google/syzkaller/pkg/cover"
22	"github.com/google/syzkaller/pkg/csource"
23	"github.com/google/syzkaller/pkg/db"
24	"github.com/google/syzkaller/pkg/gce"
25	"github.com/google/syzkaller/pkg/hash"
26	"github.com/google/syzkaller/pkg/instance"
27	"github.com/google/syzkaller/pkg/log"
28	"github.com/google/syzkaller/pkg/mgrconfig"
29	"github.com/google/syzkaller/pkg/osutil"
30	"github.com/google/syzkaller/pkg/report"
31	"github.com/google/syzkaller/pkg/repro"
32	"github.com/google/syzkaller/pkg/rpctype"
33	"github.com/google/syzkaller/pkg/signal"
34	"github.com/google/syzkaller/prog"
35	"github.com/google/syzkaller/sys"
36	"github.com/google/syzkaller/vm"
37)
38
39var (
40	flagConfig = flag.String("config", "", "configuration file")
41	flagDebug  = flag.Bool("debug", false, "dump all VM output to console")
42	flagBench  = flag.String("bench", "", "write execution statistics into this file periodically")
43)
44
45type Manager struct {
46	cfg            *mgrconfig.Config
47	vmPool         *vm.Pool
48	target         *prog.Target
49	reporter       report.Reporter
50	crashdir       string
51	port           int
52	corpusDB       *db.DB
53	startTime      time.Time
54	firstConnect   time.Time
55	fuzzingTime    time.Duration
56	stats          *Stats
57	fuzzerStats    map[string]uint64
58	crashTypes     map[string]bool
59	vmStop         chan bool
60	checkResult    *rpctype.CheckArgs
61	fresh          bool
62	numFuzzing     uint32
63	numReproducing uint32
64
65	dash *dashapi.Dashboard
66
67	mu              sync.Mutex
68	phase           int
69	enabledSyscalls []int
70
71	candidates     []rpctype.RPCCandidate // untriaged inputs from corpus and hub
72	disabledHashes map[string]struct{}
73	corpus         map[string]rpctype.RPCInput
74	corpusCover    cover.Cover
75	corpusSignal   signal.Signal
76	maxSignal      signal.Signal
77	prios          [][]float32
78	newRepros      [][]byte
79
80	fuzzers        map[string]*Fuzzer
81	needMoreRepros chan chan bool
82	hubReproQueue  chan *Crash
83	reproRequest   chan chan map[string]bool
84
85	// For checking that files that we are using are not changing under us.
86	// Maps file name to modification time.
87	usedFiles map[string]time.Time
88}
89
90const (
91	// Just started, nothing done yet.
92	phaseInit = iota
93	// Corpus is loaded and machine is checked.
94	phaseLoadedCorpus
95	// Triaged all inputs from corpus.
96	// This is when we start querying hub and minimizing persistent corpus.
97	phaseTriagedCorpus
98	// Done the first request to hub.
99	phaseQueriedHub
100	// Triaged all new inputs from hub.
101	// This is when we start reproducing crashes.
102	phaseTriagedHub
103)
104
105const currentDBVersion = 3
106
107type Fuzzer struct {
108	name         string
109	inputs       []rpctype.RPCInput
110	newMaxSignal signal.Signal
111}
112
113type Crash struct {
114	vmIndex int
115	hub     bool // this crash was created based on a repro from hub
116	*report.Report
117}
118
119func main() {
120	if sys.GitRevision == "" {
121		log.Fatalf("Bad syz-manager build. Build with make, run bin/syz-manager.")
122	}
123	flag.Parse()
124	log.EnableLogCaching(1000, 1<<20)
125	cfg, err := mgrconfig.LoadFile(*flagConfig)
126	if err != nil {
127		log.Fatalf("%v", err)
128	}
129	target, err := prog.GetTarget(cfg.TargetOS, cfg.TargetArch)
130	if err != nil {
131		log.Fatalf("%v", err)
132	}
133	syscalls, err := mgrconfig.ParseEnabledSyscalls(target, cfg.EnabledSyscalls, cfg.DisabledSyscalls)
134	if err != nil {
135		log.Fatalf("%v", err)
136	}
137	RunManager(cfg, target, syscalls)
138}
139
140func RunManager(cfg *mgrconfig.Config, target *prog.Target, syscalls map[int]bool) {
141	var vmPool *vm.Pool
142	// Type "none" is a special case for debugging/development when manager
143	// does not start any VMs, but instead you start them manually
144	// and start syz-fuzzer there.
145	if cfg.Type != "none" {
146		var err error
147		vmPool, err = vm.Create(cfg, *flagDebug)
148		if err != nil {
149			log.Fatalf("%v", err)
150		}
151	}
152
153	crashdir := filepath.Join(cfg.Workdir, "crashes")
154	osutil.MkdirAll(crashdir)
155
156	var enabledSyscalls []int
157	for c := range syscalls {
158		enabledSyscalls = append(enabledSyscalls, c)
159	}
160
161	reporter, err := report.NewReporter(cfg)
162	if err != nil {
163		log.Fatalf("%v", err)
164	}
165
166	mgr := &Manager{
167		cfg:             cfg,
168		vmPool:          vmPool,
169		target:          target,
170		reporter:        reporter,
171		crashdir:        crashdir,
172		startTime:       time.Now(),
173		stats:           new(Stats),
174		fuzzerStats:     make(map[string]uint64),
175		crashTypes:      make(map[string]bool),
176		enabledSyscalls: enabledSyscalls,
177		corpus:          make(map[string]rpctype.RPCInput),
178		disabledHashes:  make(map[string]struct{}),
179		fuzzers:         make(map[string]*Fuzzer),
180		fresh:           true,
181		vmStop:          make(chan bool),
182		hubReproQueue:   make(chan *Crash, 10),
183		needMoreRepros:  make(chan chan bool),
184		reproRequest:    make(chan chan map[string]bool),
185		usedFiles:       make(map[string]time.Time),
186	}
187
188	log.Logf(0, "loading corpus...")
189	mgr.corpusDB, err = db.Open(filepath.Join(cfg.Workdir, "corpus.db"))
190	if err != nil {
191		log.Fatalf("failed to open corpus database: %v", err)
192	}
193
194	// Create HTTP server.
195	mgr.initHTTP()
196	mgr.collectUsedFiles()
197
198	// Create RPC server for fuzzers.
199	s, err := rpctype.NewRPCServer(cfg.RPC, mgr)
200	if err != nil {
201		log.Fatalf("failed to create rpc server: %v", err)
202	}
203	log.Logf(0, "serving rpc on tcp://%v", s.Addr())
204	mgr.port = s.Addr().(*net.TCPAddr).Port
205	go s.Serve()
206
207	if cfg.DashboardAddr != "" {
208		mgr.dash = dashapi.New(cfg.DashboardClient, cfg.DashboardAddr, cfg.DashboardKey)
209	}
210
211	go func() {
212		for lastTime := time.Now(); ; {
213			time.Sleep(10 * time.Second)
214			now := time.Now()
215			diff := now.Sub(lastTime)
216			lastTime = now
217			mgr.mu.Lock()
218			if mgr.firstConnect.IsZero() {
219				mgr.mu.Unlock()
220				continue
221			}
222			mgr.fuzzingTime += diff * time.Duration(atomic.LoadUint32(&mgr.numFuzzing))
223			executed := mgr.stats.execTotal.get()
224			crashes := mgr.stats.crashes.get()
225			signal := mgr.corpusSignal.Len()
226			mgr.mu.Unlock()
227			numReproducing := atomic.LoadUint32(&mgr.numReproducing)
228			numFuzzing := atomic.LoadUint32(&mgr.numFuzzing)
229
230			log.Logf(0, "VMs %v, executed %v, cover %v, crashes %v, repro %v",
231				numFuzzing, executed, signal, crashes, numReproducing)
232		}
233	}()
234
235	if *flagBench != "" {
236		f, err := os.OpenFile(*flagBench, os.O_WRONLY|os.O_CREATE|os.O_EXCL, osutil.DefaultFilePerm)
237		if err != nil {
238			log.Fatalf("failed to open bench file: %v", err)
239		}
240		go func() {
241			for {
242				time.Sleep(time.Minute)
243				vals := make(map[string]uint64)
244				mgr.mu.Lock()
245				if mgr.firstConnect.IsZero() {
246					mgr.mu.Unlock()
247					continue
248				}
249				mgr.minimizeCorpus()
250				vals["corpus"] = uint64(len(mgr.corpus))
251				vals["uptime"] = uint64(time.Since(mgr.firstConnect)) / 1e9
252				vals["fuzzing"] = uint64(mgr.fuzzingTime) / 1e9
253				vals["signal"] = uint64(mgr.corpusSignal.Len())
254				vals["coverage"] = uint64(len(mgr.corpusCover))
255				for k, v := range mgr.fuzzerStats {
256					vals[k] = v
257				}
258				mgr.mu.Unlock()
259				for k, v := range mgr.stats.all() {
260					vals[k] = v
261				}
262
263				data, err := json.MarshalIndent(vals, "", "  ")
264				if err != nil {
265					log.Fatalf("failed to serialize bench data")
266				}
267				if _, err := f.Write(append(data, '\n')); err != nil {
268					log.Fatalf("failed to write bench data")
269				}
270			}
271		}()
272	}
273
274	if mgr.dash != nil {
275		go mgr.dashboardReporter()
276	}
277
278	osutil.HandleInterrupts(vm.Shutdown)
279	if mgr.vmPool == nil {
280		log.Logf(0, "no VMs started (type=none)")
281		log.Logf(0, "you are supposed to start syz-fuzzer manually as:")
282		log.Logf(0, "syz-fuzzer -manager=manager.ip:%v [other flags as necessary]", mgr.port)
283		<-vm.Shutdown
284		return
285	}
286	mgr.vmLoop()
287}
288
289type RunResult struct {
290	idx   int
291	crash *Crash
292	err   error
293}
294
295type ReproResult struct {
296	instances []int
297	title0    string
298	res       *repro.Result
299	stats     *repro.Stats
300	err       error
301	hub       bool // repro came from hub
302}
303
304// Manager needs to be refactored (#605).
305// nolint: gocyclo
306func (mgr *Manager) vmLoop() {
307	log.Logf(0, "booting test machines...")
308	log.Logf(0, "wait for the connection from test machine...")
309	instancesPerRepro := 4
310	vmCount := mgr.vmPool.Count()
311	if instancesPerRepro > vmCount {
312		instancesPerRepro = vmCount
313	}
314	instances := make([]int, vmCount)
315	for i := range instances {
316		instances[i] = vmCount - i - 1
317	}
318	runDone := make(chan *RunResult, 1)
319	pendingRepro := make(map[*Crash]bool)
320	reproducing := make(map[string]bool)
321	reproInstances := 0
322	var reproQueue []*Crash
323	reproDone := make(chan *ReproResult, 1)
324	stopPending := false
325	shutdown := vm.Shutdown
326	for shutdown != nil || len(instances) != vmCount {
327		mgr.mu.Lock()
328		phase := mgr.phase
329		mgr.mu.Unlock()
330
331		for crash := range pendingRepro {
332			if reproducing[crash.Title] {
333				continue
334			}
335			delete(pendingRepro, crash)
336			if !mgr.needRepro(crash) {
337				continue
338			}
339			log.Logf(1, "loop: add to repro queue '%v'", crash.Title)
340			reproducing[crash.Title] = true
341			reproQueue = append(reproQueue, crash)
342		}
343
344		log.Logf(1, "loop: phase=%v shutdown=%v instances=%v/%v %+v repro: pending=%v reproducing=%v queued=%v",
345			phase, shutdown == nil, len(instances), vmCount, instances,
346			len(pendingRepro), len(reproducing), len(reproQueue))
347
348		canRepro := func() bool {
349			return phase >= phaseTriagedHub &&
350				len(reproQueue) != 0 && reproInstances+instancesPerRepro <= vmCount
351		}
352
353		if shutdown != nil {
354			for canRepro() && len(instances) >= instancesPerRepro {
355				last := len(reproQueue) - 1
356				crash := reproQueue[last]
357				reproQueue[last] = nil
358				reproQueue = reproQueue[:last]
359				vmIndexes := append([]int{}, instances[len(instances)-instancesPerRepro:]...)
360				instances = instances[:len(instances)-instancesPerRepro]
361				reproInstances += instancesPerRepro
362				atomic.AddUint32(&mgr.numReproducing, 1)
363				log.Logf(1, "loop: starting repro of '%v' on instances %+v", crash.Title, vmIndexes)
364				go func() {
365					res, stats, err := repro.Run(crash.Output, mgr.cfg, mgr.reporter, mgr.vmPool, vmIndexes)
366					reproDone <- &ReproResult{vmIndexes, crash.Title, res, stats, err, crash.hub}
367				}()
368			}
369			for !canRepro() && len(instances) != 0 {
370				last := len(instances) - 1
371				idx := instances[last]
372				instances = instances[:last]
373				log.Logf(1, "loop: starting instance %v", idx)
374				go func() {
375					crash, err := mgr.runInstance(idx)
376					runDone <- &RunResult{idx, crash, err}
377				}()
378			}
379		}
380
381		var stopRequest chan bool
382		if !stopPending && canRepro() {
383			stopRequest = mgr.vmStop
384		}
385
386	wait:
387		select {
388		case stopRequest <- true:
389			log.Logf(1, "loop: issued stop request")
390			stopPending = true
391		case res := <-runDone:
392			log.Logf(1, "loop: instance %v finished, crash=%v", res.idx, res.crash != nil)
393			if res.err != nil && shutdown != nil {
394				log.Logf(0, "%v", res.err)
395			}
396			stopPending = false
397			instances = append(instances, res.idx)
398			// On shutdown qemu crashes with "qemu: terminating on signal 2",
399			// which we detect as "lost connection". Don't save that as crash.
400			if shutdown != nil && res.crash != nil {
401				needRepro := mgr.saveCrash(res.crash)
402				if needRepro {
403					log.Logf(1, "loop: add pending repro for '%v'", res.crash.Title)
404					pendingRepro[res.crash] = true
405				}
406			}
407		case res := <-reproDone:
408			atomic.AddUint32(&mgr.numReproducing, ^uint32(0))
409			crepro := false
410			title := ""
411			if res.res != nil {
412				crepro = res.res.CRepro
413				title = res.res.Report.Title
414			}
415			log.Logf(1, "loop: repro on %+v finished '%v', repro=%v crepro=%v desc='%v'",
416				res.instances, res.title0, res.res != nil, crepro, title)
417			if res.err != nil {
418				log.Logf(0, "repro failed: %v", res.err)
419			}
420			delete(reproducing, res.title0)
421			instances = append(instances, res.instances...)
422			reproInstances -= instancesPerRepro
423			if res.res == nil {
424				if !res.hub {
425					mgr.saveFailedRepro(res.title0, res.stats)
426				}
427			} else {
428				mgr.saveRepro(res.res, res.stats, res.hub)
429			}
430		case <-shutdown:
431			log.Logf(1, "loop: shutting down...")
432			shutdown = nil
433		case crash := <-mgr.hubReproQueue:
434			log.Logf(1, "loop: get repro from hub")
435			pendingRepro[crash] = true
436		case reply := <-mgr.needMoreRepros:
437			reply <- phase >= phaseTriagedHub &&
438				len(reproQueue)+len(pendingRepro)+len(reproducing) == 0
439			goto wait
440		case reply := <-mgr.reproRequest:
441			repros := make(map[string]bool)
442			for title := range reproducing {
443				repros[title] = true
444			}
445			reply <- repros
446			goto wait
447		}
448	}
449}
450
451func (mgr *Manager) loadCorpus() {
452	// By default we don't re-minimize/re-smash programs from corpus,
453	// it takes lots of time on start and is unnecessary.
454	// However, on version bumps we can selectively re-minimize/re-smash.
455	minimized, smashed := true, true
456	switch mgr.corpusDB.Version {
457	case 0:
458		// Version 0 had broken minimization, so we need to re-minimize.
459		minimized = false
460		fallthrough
461	case 1:
462		// Version 1->2: memory is preallocated so lots of mmaps become unnecessary.
463		minimized = false
464		fallthrough
465	case 2:
466		// Version 2->3: big-endian hints.
467		smashed = false
468		fallthrough
469	case currentDBVersion:
470	}
471	syscalls := make(map[int]bool)
472	for _, id := range mgr.checkResult.EnabledCalls[mgr.cfg.Sandbox] {
473		syscalls[id] = true
474	}
475	deleted := 0
476	for key, rec := range mgr.corpusDB.Records {
477		p, err := mgr.target.Deserialize(rec.Val)
478		if err != nil {
479			if deleted < 10 {
480				log.Logf(0, "deleting broken program: %v\n%s", err, rec.Val)
481			}
482			mgr.corpusDB.Delete(key)
483			deleted++
484			continue
485		}
486		disabled := false
487		for _, c := range p.Calls {
488			if !syscalls[c.Meta.ID] {
489				disabled = true
490				break
491			}
492		}
493		if disabled {
494			// This program contains a disabled syscall.
495			// We won't execute it, but remember its hash so
496			// it is not deleted during minimization.
497			mgr.disabledHashes[hash.String(rec.Val)] = struct{}{}
498			continue
499		}
500		mgr.candidates = append(mgr.candidates, rpctype.RPCCandidate{
501			Prog:      rec.Val,
502			Minimized: minimized,
503			Smashed:   smashed,
504		})
505	}
506	mgr.fresh = len(mgr.corpusDB.Records) == 0
507	log.Logf(0, "%-24v: %v (%v deleted)", "corpus", len(mgr.candidates), deleted)
508
509	// Now this is ugly.
510	// We duplicate all inputs in the corpus and shuffle the second part.
511	// This solves the following problem. A fuzzer can crash while triaging candidates,
512	// in such case it will also lost all cached candidates. Or, the input can be somewhat flaky
513	// and doesn't give the coverage on first try. So we give each input the second chance.
514	// Shuffling should alleviate deterministically losing the same inputs on fuzzer crashing.
515	mgr.candidates = append(mgr.candidates, mgr.candidates...)
516	shuffle := mgr.candidates[len(mgr.candidates)/2:]
517	for i := range shuffle {
518		j := i + rand.Intn(len(shuffle)-i)
519		shuffle[i], shuffle[j] = shuffle[j], shuffle[i]
520	}
521	if mgr.phase != phaseInit {
522		panic(fmt.Sprintf("loadCorpus: bad phase %v", mgr.phase))
523	}
524	mgr.phase = phaseLoadedCorpus
525}
526
527func (mgr *Manager) runInstance(index int) (*Crash, error) {
528	mgr.checkUsedFiles()
529	inst, err := mgr.vmPool.Create(index)
530	if err != nil {
531		return nil, fmt.Errorf("failed to create instance: %v", err)
532	}
533	defer inst.Close()
534
535	fwdAddr, err := inst.Forward(mgr.port)
536	if err != nil {
537		return nil, fmt.Errorf("failed to setup port forwarding: %v", err)
538	}
539	fuzzerBin, err := inst.Copy(mgr.cfg.SyzFuzzerBin)
540	if err != nil {
541		return nil, fmt.Errorf("failed to copy binary: %v", err)
542	}
543	executorBin, err := inst.Copy(mgr.cfg.SyzExecutorBin)
544	if err != nil {
545		return nil, fmt.Errorf("failed to copy binary: %v", err)
546	}
547
548	fuzzerV := 0
549	procs := mgr.cfg.Procs
550	if *flagDebug {
551		fuzzerV = 100
552		procs = 1
553	}
554
555	// Run the fuzzer binary.
556	start := time.Now()
557	atomic.AddUint32(&mgr.numFuzzing, 1)
558	defer atomic.AddUint32(&mgr.numFuzzing, ^uint32(0))
559	cmd := instance.FuzzerCmd(fuzzerBin, executorBin, fmt.Sprintf("vm-%v", index),
560		mgr.cfg.TargetOS, mgr.cfg.TargetArch, fwdAddr, mgr.cfg.Sandbox, procs, fuzzerV,
561		mgr.cfg.Cover, *flagDebug, false, false)
562	outc, errc, err := inst.Run(time.Hour, mgr.vmStop, cmd)
563	if err != nil {
564		return nil, fmt.Errorf("failed to run fuzzer: %v", err)
565	}
566
567	rep := inst.MonitorExecution(outc, errc, mgr.reporter, false)
568	if rep == nil {
569		// This is the only "OK" outcome.
570		log.Logf(0, "vm-%v: running for %v, restarting", index, time.Since(start))
571		return nil, nil
572	}
573	crash := &Crash{
574		vmIndex: index,
575		hub:     false,
576		Report:  rep,
577	}
578	return crash, nil
579}
580
581func (mgr *Manager) emailCrash(crash *Crash) {
582	if len(mgr.cfg.EmailAddrs) == 0 {
583		return
584	}
585	args := []string{"-s", "syzkaller: " + crash.Title}
586	args = append(args, mgr.cfg.EmailAddrs...)
587	log.Logf(0, "sending email to %v", mgr.cfg.EmailAddrs)
588
589	cmd := exec.Command("mailx", args...)
590	cmd.Stdin = bytes.NewReader(crash.Report.Report)
591	if _, err := osutil.Run(10*time.Minute, cmd); err != nil {
592		log.Logf(0, "failed to send email: %v", err)
593	}
594}
595
596func (mgr *Manager) saveCrash(crash *Crash) bool {
597	if crash.Suppressed {
598		log.Logf(0, "vm-%v: suppressed crash %v", crash.vmIndex, crash.Title)
599		mgr.stats.crashSuppressed.inc()
600		return false
601	}
602	corrupted := ""
603	if crash.Corrupted {
604		corrupted = " [corrupted]"
605	}
606	log.Logf(0, "vm-%v: crash: %v%v", crash.vmIndex, crash.Title, corrupted)
607	if err := mgr.reporter.Symbolize(crash.Report); err != nil {
608		log.Logf(0, "failed to symbolize report: %v", err)
609	}
610
611	mgr.stats.crashes.inc()
612	mgr.mu.Lock()
613	if !mgr.crashTypes[crash.Title] {
614		mgr.crashTypes[crash.Title] = true
615		mgr.stats.crashTypes.inc()
616	}
617	mgr.mu.Unlock()
618
619	if mgr.dash != nil {
620		dc := &dashapi.Crash{
621			BuildID:     mgr.cfg.Tag,
622			Title:       crash.Title,
623			Corrupted:   crash.Corrupted,
624			Maintainers: crash.Maintainers,
625			Log:         crash.Output,
626			Report:      crash.Report.Report,
627		}
628		resp, err := mgr.dash.ReportCrash(dc)
629		if err != nil {
630			log.Logf(0, "failed to report crash to dashboard: %v", err)
631		} else {
632			// Don't store the crash locally, if we've successfully
633			// uploaded it to the dashboard. These will just eat disk space.
634			return resp.NeedRepro
635		}
636	}
637
638	sig := hash.Hash([]byte(crash.Title))
639	id := sig.String()
640	dir := filepath.Join(mgr.crashdir, id)
641	osutil.MkdirAll(dir)
642	if err := osutil.WriteFile(filepath.Join(dir, "description"), []byte(crash.Title+"\n")); err != nil {
643		log.Logf(0, "failed to write crash: %v", err)
644	}
645	// Save up to 100 reports. If we already have 100, overwrite the oldest one.
646	// Newer reports are generally more useful. Overwriting is also needed
647	// to be able to understand if a particular bug still happens or already fixed.
648	oldestI := 0
649	var oldestTime time.Time
650	for i := 0; i < 100; i++ {
651		info, err := os.Stat(filepath.Join(dir, fmt.Sprintf("log%v", i)))
652		if err != nil {
653			oldestI = i
654			if i == 0 {
655				go mgr.emailCrash(crash)
656			}
657			break
658		}
659		if oldestTime.IsZero() || info.ModTime().Before(oldestTime) {
660			oldestI = i
661			oldestTime = info.ModTime()
662		}
663	}
664	osutil.WriteFile(filepath.Join(dir, fmt.Sprintf("log%v", oldestI)), crash.Output)
665	if len(mgr.cfg.Tag) > 0 {
666		osutil.WriteFile(filepath.Join(dir, fmt.Sprintf("tag%v", oldestI)), []byte(mgr.cfg.Tag))
667	}
668	if len(crash.Report.Report) > 0 {
669		osutil.WriteFile(filepath.Join(dir, fmt.Sprintf("report%v", oldestI)), crash.Report.Report)
670	}
671
672	return mgr.needLocalRepro(crash)
673}
674
675const maxReproAttempts = 3
676
677func (mgr *Manager) needLocalRepro(crash *Crash) bool {
678	if !mgr.cfg.Reproduce || crash.Corrupted {
679		return false
680	}
681	sig := hash.Hash([]byte(crash.Title))
682	dir := filepath.Join(mgr.crashdir, sig.String())
683	if osutil.IsExist(filepath.Join(dir, "repro.prog")) {
684		return false
685	}
686	for i := 0; i < maxReproAttempts; i++ {
687		if !osutil.IsExist(filepath.Join(dir, fmt.Sprintf("repro%v", i))) {
688			return true
689		}
690	}
691	return false
692}
693
694func (mgr *Manager) needRepro(crash *Crash) bool {
695	if crash.hub {
696		return true
697	}
698	if mgr.dash == nil {
699		return mgr.needLocalRepro(crash)
700	}
701	cid := &dashapi.CrashID{
702		BuildID:   mgr.cfg.Tag,
703		Title:     crash.Title,
704		Corrupted: crash.Corrupted,
705	}
706	needRepro, err := mgr.dash.NeedRepro(cid)
707	if err != nil {
708		log.Logf(0, "dashboard.NeedRepro failed: %v", err)
709	}
710	return needRepro
711}
712
713func (mgr *Manager) saveFailedRepro(desc string, stats *repro.Stats) {
714	if mgr.dash != nil {
715		cid := &dashapi.CrashID{
716			BuildID: mgr.cfg.Tag,
717			Title:   desc,
718		}
719		if err := mgr.dash.ReportFailedRepro(cid); err != nil {
720			log.Logf(0, "failed to report failed repro to dashboard: %v", err)
721		} else {
722			return
723		}
724	}
725	dir := filepath.Join(mgr.crashdir, hash.String([]byte(desc)))
726	osutil.MkdirAll(dir)
727	for i := 0; i < maxReproAttempts; i++ {
728		name := filepath.Join(dir, fmt.Sprintf("repro%v", i))
729		if !osutil.IsExist(name) {
730			saveReproStats(name, stats)
731			break
732		}
733	}
734}
735
736func (mgr *Manager) saveRepro(res *repro.Result, stats *repro.Stats, hub bool) {
737	rep := res.Report
738	if err := mgr.reporter.Symbolize(rep); err != nil {
739		log.Logf(0, "failed to symbolize repro: %v", err)
740	}
741	opts := fmt.Sprintf("# %+v\n", res.Opts)
742	prog := res.Prog.Serialize()
743
744	// Append this repro to repro list to send to hub if it didn't come from hub originally.
745	if !hub {
746		progForHub := []byte(fmt.Sprintf("# %+v\n# %v\n# %v\n%s",
747			res.Opts, res.Report.Title, mgr.cfg.Tag, prog))
748		mgr.mu.Lock()
749		mgr.newRepros = append(mgr.newRepros, progForHub)
750		mgr.mu.Unlock()
751	}
752
753	var cprogText []byte
754	if res.CRepro {
755		cprog, err := csource.Write(res.Prog, res.Opts)
756		if err == nil {
757			formatted, err := csource.Format(cprog)
758			if err == nil {
759				cprog = formatted
760			}
761			cprogText = cprog
762		} else {
763			log.Logf(0, "failed to write C source: %v", err)
764		}
765	}
766
767	if mgr.dash != nil {
768		// Note: we intentionally don't set Corrupted for reproducers:
769		// 1. This is reproducible so can be debugged even with corrupted report.
770		// 2. Repro re-tried 3 times and still got corrupted report at the end,
771		//    so maybe corrupted report detection is broken.
772		// 3. Reproduction is expensive so it's good to persist the result.
773		dc := &dashapi.Crash{
774			BuildID:     mgr.cfg.Tag,
775			Title:       res.Report.Title,
776			Maintainers: res.Report.Maintainers,
777			Log:         res.Report.Output,
778			Report:      res.Report.Report,
779			ReproOpts:   res.Opts.Serialize(),
780			ReproSyz:    res.Prog.Serialize(),
781			ReproC:      cprogText,
782		}
783		if _, err := mgr.dash.ReportCrash(dc); err != nil {
784			log.Logf(0, "failed to report repro to dashboard: %v", err)
785		} else {
786			// Don't store the crash locally, if we've successfully
787			// uploaded it to the dashboard. These will just eat disk space.
788			return
789		}
790	}
791
792	dir := filepath.Join(mgr.crashdir, hash.String([]byte(rep.Title)))
793	osutil.MkdirAll(dir)
794
795	if err := osutil.WriteFile(filepath.Join(dir, "description"), []byte(rep.Title+"\n")); err != nil {
796		log.Logf(0, "failed to write crash: %v", err)
797	}
798	osutil.WriteFile(filepath.Join(dir, "repro.prog"), append([]byte(opts), prog...))
799	if len(mgr.cfg.Tag) > 0 {
800		osutil.WriteFile(filepath.Join(dir, "repro.tag"), []byte(mgr.cfg.Tag))
801	}
802	if len(rep.Output) > 0 {
803		osutil.WriteFile(filepath.Join(dir, "repro.log"), rep.Output)
804	}
805	if len(rep.Report) > 0 {
806		osutil.WriteFile(filepath.Join(dir, "repro.report"), rep.Report)
807	}
808	if len(cprogText) > 0 {
809		osutil.WriteFile(filepath.Join(dir, "repro.cprog"), cprogText)
810	}
811	saveReproStats(filepath.Join(dir, "repro.stats"), stats)
812}
813
814func saveReproStats(filename string, stats *repro.Stats) {
815	text := ""
816	if stats != nil {
817		text = fmt.Sprintf("Extracting prog: %v\nMinimizing prog: %v\n"+
818			"Simplifying prog options: %v\nExtracting C: %v\nSimplifying C: %v\n\n\n%s",
819			stats.ExtractProgTime, stats.MinimizeProgTime,
820			stats.SimplifyProgTime, stats.ExtractCTime, stats.SimplifyCTime, stats.Log)
821	}
822	osutil.WriteFile(filename, []byte(text))
823}
824
825func (mgr *Manager) getMinimizedCorpus() (corpus, repros [][]byte) {
826	mgr.mu.Lock()
827	defer mgr.mu.Unlock()
828	mgr.minimizeCorpus()
829	corpus = make([][]byte, 0, len(mgr.corpus))
830	for _, inp := range mgr.corpus {
831		corpus = append(corpus, inp.Prog)
832	}
833	repros = mgr.newRepros
834	mgr.newRepros = nil
835	return
836}
837
838func (mgr *Manager) addNewCandidates(progs [][]byte) {
839	candidates := make([]rpctype.RPCCandidate, len(progs))
840	for i, inp := range progs {
841		candidates[i] = rpctype.RPCCandidate{
842			Prog:      inp,
843			Minimized: false, // don't trust programs from hub
844			Smashed:   false,
845		}
846	}
847	mgr.mu.Lock()
848	defer mgr.mu.Unlock()
849	mgr.candidates = append(mgr.candidates, candidates...)
850	if mgr.phase == phaseTriagedCorpus {
851		mgr.phase = phaseQueriedHub
852	}
853}
854
855func (mgr *Manager) minimizeCorpus() {
856	if mgr.phase < phaseLoadedCorpus {
857		return
858	}
859	inputs := make([]signal.Context, 0, len(mgr.corpus))
860	for _, inp := range mgr.corpus {
861		inputs = append(inputs, signal.Context{
862			Signal:  inp.Signal.Deserialize(),
863			Context: inp,
864		})
865	}
866	newCorpus := make(map[string]rpctype.RPCInput)
867	for _, ctx := range signal.Minimize(inputs) {
868		inp := ctx.(rpctype.RPCInput)
869		newCorpus[hash.String(inp.Prog)] = inp
870	}
871	log.Logf(1, "minimized corpus: %v -> %v", len(mgr.corpus), len(newCorpus))
872	mgr.corpus = newCorpus
873
874	// Don't minimize persistent corpus until fuzzers have triaged all inputs from it.
875	if mgr.phase < phaseTriagedCorpus {
876		return
877	}
878	for key := range mgr.corpusDB.Records {
879		_, ok1 := mgr.corpus[key]
880		_, ok2 := mgr.disabledHashes[key]
881		if !ok1 && !ok2 {
882			mgr.corpusDB.Delete(key)
883		}
884	}
885	mgr.corpusDB.BumpVersion(currentDBVersion)
886}
887
888func (mgr *Manager) Connect(a *rpctype.ConnectArgs, r *rpctype.ConnectRes) error {
889	log.Logf(1, "fuzzer %v connected", a.Name)
890	mgr.stats.vmRestarts.inc()
891	mgr.mu.Lock()
892	defer mgr.mu.Unlock()
893
894	f := &Fuzzer{
895		name: a.Name,
896	}
897	mgr.fuzzers[a.Name] = f
898	mgr.minimizeCorpus()
899	f.newMaxSignal = mgr.maxSignal.Copy()
900	f.inputs = make([]rpctype.RPCInput, 0, len(mgr.corpus))
901	for _, inp := range mgr.corpus {
902		f.inputs = append(f.inputs, inp)
903	}
904	r.EnabledCalls = mgr.enabledSyscalls
905	r.CheckResult = mgr.checkResult
906	r.GitRevision = sys.GitRevision
907	r.TargetRevision = mgr.target.Revision
908	return nil
909}
910
911func (mgr *Manager) Check(a *rpctype.CheckArgs, r *int) error {
912	mgr.mu.Lock()
913	defer mgr.mu.Unlock()
914
915	if mgr.checkResult != nil {
916		return nil
917	}
918	if len(mgr.cfg.EnabledSyscalls) != 0 && len(a.DisabledCalls[mgr.cfg.Sandbox]) != 0 {
919		disabled := make(map[string]string)
920		for _, dc := range a.DisabledCalls[mgr.cfg.Sandbox] {
921			disabled[mgr.target.Syscalls[dc.ID].Name] = dc.Reason
922		}
923		for _, id := range mgr.enabledSyscalls {
924			name := mgr.target.Syscalls[id].Name
925			if reason := disabled[name]; reason != "" {
926				log.Logf(0, "disabling %v: %v", name, reason)
927			}
928		}
929	}
930	if a.Error != "" {
931		log.Fatalf("machine check: %v", a.Error)
932	}
933	log.Logf(0, "machine check:")
934	log.Logf(0, "%-24v: %v/%v", "syscalls",
935		len(a.EnabledCalls[mgr.cfg.Sandbox]), len(mgr.target.Syscalls))
936	for _, feat := range a.Features {
937		log.Logf(0, "%-24v: %v", feat.Name, feat.Reason)
938	}
939	a.DisabledCalls = nil
940	mgr.checkResult = a
941	mgr.loadCorpus()
942	mgr.firstConnect = time.Now()
943	return nil
944}
945
946func (mgr *Manager) NewInput(a *rpctype.NewInputArgs, r *int) error {
947	inputSignal := a.Signal.Deserialize()
948	log.Logf(4, "new input from %v for syscall %v (signal=%v, cover=%v)",
949		a.Name, a.Call, inputSignal.Len(), len(a.Cover))
950	mgr.mu.Lock()
951	defer mgr.mu.Unlock()
952
953	f := mgr.fuzzers[a.Name]
954	if f == nil {
955		log.Fatalf("fuzzer %v is not connected", a.Name)
956	}
957
958	if _, err := mgr.target.Deserialize(a.RPCInput.Prog); err != nil {
959		// This should not happen, but we see such cases episodically, reason unknown.
960		log.Logf(0, "failed to deserialize program from fuzzer: %v\n%s", err, a.RPCInput.Prog)
961		return nil
962	}
963	if mgr.corpusSignal.Diff(inputSignal).Empty() {
964		return nil
965	}
966	mgr.stats.newInputs.inc()
967	mgr.corpusSignal.Merge(inputSignal)
968	mgr.corpusCover.Merge(a.Cover)
969	sig := hash.String(a.RPCInput.Prog)
970	if inp, ok := mgr.corpus[sig]; ok {
971		// The input is already present, but possibly with diffent signal/coverage/call.
972		inputSignal.Merge(inp.Signal.Deserialize())
973		inp.Signal = inputSignal.Serialize()
974		var inputCover cover.Cover
975		inputCover.Merge(inp.Cover)
976		inputCover.Merge(a.RPCInput.Cover)
977		inp.Cover = inputCover.Serialize()
978		mgr.corpus[sig] = inp
979	} else {
980		mgr.corpus[sig] = a.RPCInput
981		mgr.corpusDB.Save(sig, a.RPCInput.Prog, 0)
982		if err := mgr.corpusDB.Flush(); err != nil {
983			log.Logf(0, "failed to save corpus database: %v", err)
984		}
985		for _, f1 := range mgr.fuzzers {
986			if f1 == f {
987				continue
988			}
989			inp := a.RPCInput
990			inp.Cover = nil // Don't send coverage back to all fuzzers.
991			f1.inputs = append(f1.inputs, inp)
992		}
993	}
994	return nil
995}
996
997func (mgr *Manager) Poll(a *rpctype.PollArgs, r *rpctype.PollRes) error {
998	mgr.mu.Lock()
999	defer mgr.mu.Unlock()
1000
1001	for k, v := range a.Stats {
1002		switch k {
1003		case "exec total":
1004			mgr.stats.execTotal.add(int(v))
1005		default:
1006			mgr.fuzzerStats[k] += v
1007		}
1008	}
1009
1010	f := mgr.fuzzers[a.Name]
1011	if f == nil {
1012		log.Fatalf("fuzzer %v is not connected", a.Name)
1013	}
1014	newMaxSignal := mgr.maxSignal.Diff(a.MaxSignal.Deserialize())
1015	if !newMaxSignal.Empty() {
1016		mgr.maxSignal.Merge(newMaxSignal)
1017		for _, f1 := range mgr.fuzzers {
1018			if f1 == f {
1019				continue
1020			}
1021			f1.newMaxSignal.Merge(newMaxSignal)
1022		}
1023	}
1024	r.MaxSignal = f.newMaxSignal.Split(500).Serialize()
1025	maxInputs := 5
1026	if maxInputs < mgr.cfg.Procs {
1027		maxInputs = mgr.cfg.Procs
1028	}
1029	if a.NeedCandidates {
1030		for i := 0; i < maxInputs && len(mgr.candidates) > 0; i++ {
1031			last := len(mgr.candidates) - 1
1032			r.Candidates = append(r.Candidates, mgr.candidates[last])
1033			mgr.candidates[last] = rpctype.RPCCandidate{}
1034			mgr.candidates = mgr.candidates[:last]
1035		}
1036	}
1037	if len(r.Candidates) == 0 {
1038		for i := 0; i < maxInputs && len(f.inputs) > 0; i++ {
1039			last := len(f.inputs) - 1
1040			r.NewInputs = append(r.NewInputs, f.inputs[last])
1041			f.inputs[last] = rpctype.RPCInput{}
1042			f.inputs = f.inputs[:last]
1043		}
1044		if len(f.inputs) == 0 {
1045			f.inputs = nil
1046		}
1047	}
1048	if len(mgr.candidates) == 0 {
1049		mgr.candidates = nil
1050		if mgr.phase == phaseLoadedCorpus {
1051			if mgr.cfg.HubClient != "" {
1052				mgr.phase = phaseTriagedCorpus
1053				go mgr.hubSyncLoop()
1054			} else {
1055				mgr.phase = phaseTriagedHub
1056			}
1057		} else if mgr.phase == phaseQueriedHub {
1058			mgr.phase = phaseTriagedHub
1059		}
1060	}
1061	log.Logf(4, "poll from %v: candidates=%v inputs=%v maxsignal=%v",
1062		a.Name, len(r.Candidates), len(r.NewInputs), len(r.MaxSignal.Elems))
1063	return nil
1064}
1065
1066func (mgr *Manager) collectUsedFiles() {
1067	if mgr.vmPool == nil {
1068		return
1069	}
1070	addUsedFile := func(f string) {
1071		if f == "" {
1072			return
1073		}
1074		stat, err := os.Stat(f)
1075		if err != nil {
1076			log.Fatalf("failed to stat %v: %v", f, err)
1077		}
1078		mgr.usedFiles[f] = stat.ModTime()
1079	}
1080	cfg := mgr.cfg
1081	addUsedFile(cfg.SyzFuzzerBin)
1082	addUsedFile(cfg.SyzExecprogBin)
1083	addUsedFile(cfg.SyzExecutorBin)
1084	addUsedFile(cfg.SSHKey)
1085	if vmlinux := filepath.Join(cfg.KernelObj, "vmlinux"); osutil.IsExist(vmlinux) {
1086		addUsedFile(vmlinux)
1087	}
1088	if zircon := filepath.Join(cfg.KernelObj, "zircon.elf"); osutil.IsExist(zircon) {
1089		addUsedFile(zircon)
1090	}
1091	if cfg.Image != "9p" {
1092		addUsedFile(cfg.Image)
1093	}
1094}
1095
1096func (mgr *Manager) checkUsedFiles() {
1097	for f, mod := range mgr.usedFiles {
1098		stat, err := os.Stat(f)
1099		if err != nil {
1100			log.Fatalf("failed to stat %v: %v", f, err)
1101		}
1102		if mod != stat.ModTime() {
1103			log.Fatalf("file %v that syz-manager uses has been modified by an external program\n"+
1104				"this can lead to arbitrary syz-manager misbehavior\n"+
1105				"modification time has changed: %v -> %v\n"+
1106				"don't modify files that syz-manager uses. exiting to prevent harm",
1107				f, mod, stat.ModTime())
1108		}
1109	}
1110}
1111
1112func (mgr *Manager) dashboardReporter() {
1113	webAddr := publicWebAddr(mgr.cfg.HTTP)
1114	var lastFuzzingTime time.Duration
1115	var lastCrashes, lastExecs uint64
1116	for {
1117		time.Sleep(time.Minute)
1118		mgr.mu.Lock()
1119		if mgr.firstConnect.IsZero() {
1120			mgr.mu.Unlock()
1121			continue
1122		}
1123		crashes := mgr.stats.crashes.get()
1124		execs := mgr.stats.execTotal.get()
1125		req := &dashapi.ManagerStatsReq{
1126			Name:        mgr.cfg.Name,
1127			Addr:        webAddr,
1128			UpTime:      time.Since(mgr.firstConnect),
1129			Corpus:      uint64(len(mgr.corpus)),
1130			Cover:       uint64(mgr.corpusSignal.Len()),
1131			FuzzingTime: mgr.fuzzingTime - lastFuzzingTime,
1132			Crashes:     crashes - lastCrashes,
1133			Execs:       execs - lastExecs,
1134		}
1135		mgr.mu.Unlock()
1136
1137		if err := mgr.dash.UploadManagerStats(req); err != nil {
1138			log.Logf(0, "faield to upload dashboard stats: %v", err)
1139			continue
1140		}
1141		mgr.mu.Lock()
1142		lastFuzzingTime += req.FuzzingTime
1143		lastCrashes += req.Crashes
1144		lastExecs += req.Execs
1145		mgr.mu.Unlock()
1146	}
1147}
1148
1149func publicWebAddr(addr string) string {
1150	_, port, err := net.SplitHostPort(addr)
1151	if err == nil && port != "" {
1152		if host, err := os.Hostname(); err == nil {
1153			addr = net.JoinHostPort(host, port)
1154		}
1155		if GCE, err := gce.NewContext(); err == nil {
1156			addr = net.JoinHostPort(GCE.ExternalIP, port)
1157		}
1158	}
1159	return "http://" + addr
1160}
1161