• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2015 syzkaller project authors. All rights reserved.
2// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
3
4// Package vm provides an abstract test machine (VM, physical machine, etc)
5// interface for the rest of the system.
6// For convenience test machines are subsequently collectively called VMs.
7// Package wraps vmimpl package interface with some common functionality
8// and higher-level interface.
9package vm
10
11import (
12	"bytes"
13	"fmt"
14	"os"
15	"time"
16
17	"github.com/google/syzkaller/pkg/mgrconfig"
18	"github.com/google/syzkaller/pkg/osutil"
19	"github.com/google/syzkaller/pkg/report"
20	"github.com/google/syzkaller/vm/vmimpl"
21
22	// Import all VM implementations, so that users only need to import vm.
23	_ "github.com/google/syzkaller/vm/adb"
24	_ "github.com/google/syzkaller/vm/gce"
25	_ "github.com/google/syzkaller/vm/gvisor"
26	_ "github.com/google/syzkaller/vm/isolated"
27	_ "github.com/google/syzkaller/vm/kvm"
28	_ "github.com/google/syzkaller/vm/odroid"
29	_ "github.com/google/syzkaller/vm/qemu"
30)
31
32type Pool struct {
33	impl    vmimpl.Pool
34	workdir string
35}
36
37type Instance struct {
38	impl    vmimpl.Instance
39	workdir string
40	index   int
41}
42
43var (
44	Shutdown   = vmimpl.Shutdown
45	ErrTimeout = vmimpl.ErrTimeout
46)
47
48type BootErrorer interface {
49	BootError() (string, []byte)
50}
51
52func Create(cfg *mgrconfig.Config, debug bool) (*Pool, error) {
53	env := &vmimpl.Env{
54		Name:    cfg.Name,
55		OS:      cfg.TargetOS,
56		Arch:    cfg.TargetVMArch,
57		Workdir: cfg.Workdir,
58		Image:   cfg.Image,
59		SSHKey:  cfg.SSHKey,
60		SSHUser: cfg.SSHUser,
61		Debug:   debug,
62		Config:  cfg.VM,
63	}
64	impl, err := vmimpl.Create(cfg.Type, env)
65	if err != nil {
66		return nil, err
67	}
68	return &Pool{
69		impl:    impl,
70		workdir: env.Workdir,
71	}, nil
72}
73
74func (pool *Pool) Count() int {
75	return pool.impl.Count()
76}
77
78func (pool *Pool) Create(index int) (*Instance, error) {
79	if index < 0 || index >= pool.Count() {
80		return nil, fmt.Errorf("invalid VM index %v (count %v)", index, pool.Count())
81	}
82	workdir, err := osutil.ProcessTempDir(pool.workdir)
83	if err != nil {
84		return nil, fmt.Errorf("failed to create instance temp dir: %v", err)
85	}
86	impl, err := pool.impl.Create(workdir, index)
87	if err != nil {
88		os.RemoveAll(workdir)
89		return nil, err
90	}
91	return &Instance{
92		impl:    impl,
93		workdir: workdir,
94		index:   index,
95	}, nil
96}
97
98func (inst *Instance) Copy(hostSrc string) (string, error) {
99	return inst.impl.Copy(hostSrc)
100}
101
102func (inst *Instance) Forward(port int) (string, error) {
103	return inst.impl.Forward(port)
104}
105
106func (inst *Instance) Run(timeout time.Duration, stop <-chan bool, command string) (
107	outc <-chan []byte, errc <-chan error, err error) {
108	return inst.impl.Run(timeout, stop, command)
109}
110
111func (inst *Instance) Diagnose() bool {
112	return inst.impl.Diagnose()
113}
114
115func (inst *Instance) Close() {
116	inst.impl.Close()
117	os.RemoveAll(inst.workdir)
118}
119
120// MonitorExecution monitors execution of a program running inside of a VM.
121// It detects kernel oopses in output, lost connections, hangs, etc.
122// outc/errc is what vm.Instance.Run returns, reporter parses kernel output for oopses.
123// If canExit is false and the program exits, it is treated as an error.
124// Returns a non-symbolized crash report, or nil if no error happens.
125func (inst *Instance) MonitorExecution(outc <-chan []byte, errc <-chan error,
126	reporter report.Reporter, canExit bool) (rep *report.Report) {
127	mon := &monitor{
128		inst:     inst,
129		outc:     outc,
130		errc:     errc,
131		reporter: reporter,
132		canExit:  canExit,
133	}
134	lastExecuteTime := time.Now()
135	ticker := time.NewTicker(10 * time.Second)
136	defer ticker.Stop()
137	for {
138		select {
139		case err := <-errc:
140			switch err {
141			case nil:
142				// The program has exited without errors,
143				// but wait for kernel output in case there is some delayed oops.
144				return mon.extractError("")
145			case ErrTimeout:
146				return nil
147			default:
148				// Note: connection lost can race with a kernel oops message.
149				// In such case we want to return the kernel oops.
150				return mon.extractError("lost connection to test machine")
151			}
152		case out := <-outc:
153			lastPos := len(mon.output)
154			mon.output = append(mon.output, out...)
155			if bytes.Contains(mon.output[lastPos:], executingProgram1) ||
156				bytes.Contains(mon.output[lastPos:], executingProgram2) {
157				lastExecuteTime = time.Now()
158			}
159			if reporter.ContainsCrash(mon.output[mon.matchPos:]) {
160				return mon.extractError("unknown error")
161			}
162			if len(mon.output) > 2*beforeContext {
163				copy(mon.output, mon.output[len(mon.output)-beforeContext:])
164				mon.output = mon.output[:beforeContext]
165			}
166			mon.matchPos = len(mon.output) - maxErrorLength
167			if mon.matchPos < 0 {
168				mon.matchPos = 0
169			}
170		case <-ticker.C:
171			// Detect both "not output whatsoever" and "kernel episodically prints
172			// something to console, but fuzzer is not actually executing programs".
173			// The timeout used to be 3 mins for a long time.
174			// But (1) we were seeing flakes on linux where net namespace
175			// destruction can be really slow, and (2) gVisor watchdog timeout
176			// is 3 mins + 1/4 of that for checking period = 3m45s.
177			// Current linux max timeout is CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=140
178			// and workqueue.watchdog_thresh=140 which both actually result
179			// in 140-280s detection delay.
180			// So the current timeout is 5 mins (300s).
181			// We don't want it to be too long too because it will waste time on real hangs.
182			if time.Since(lastExecuteTime) < 5*time.Minute {
183				break
184			}
185			if inst.Diagnose() {
186				mon.waitForOutput()
187			}
188			rep := &report.Report{
189				Title:      "no output from test machine",
190				Output:     mon.output,
191				Suppressed: report.IsSuppressed(mon.reporter, mon.output),
192			}
193			return rep
194		case <-Shutdown:
195			return nil
196		}
197	}
198}
199
200type monitor struct {
201	inst     *Instance
202	outc     <-chan []byte
203	errc     <-chan error
204	reporter report.Reporter
205	canExit  bool
206	output   []byte
207	matchPos int
208}
209
210func (mon *monitor) extractError(defaultError string) *report.Report {
211	// Give it some time to finish writing the error message.
212	mon.inst.Diagnose()
213	mon.waitForOutput()
214	if bytes.Contains(mon.output, []byte("SYZ-FUZZER: PREEMPTED")) {
215		return nil
216	}
217	if !mon.reporter.ContainsCrash(mon.output[mon.matchPos:]) {
218		if defaultError == "" {
219			if mon.canExit {
220				return nil
221			}
222			defaultError = "lost connection to test machine"
223		}
224		rep := &report.Report{
225			Title:      defaultError,
226			Output:     mon.output,
227			Suppressed: report.IsSuppressed(mon.reporter, mon.output),
228		}
229		return rep
230	}
231	rep := mon.reporter.Parse(mon.output[mon.matchPos:])
232	if rep == nil {
233		panic(fmt.Sprintf("reporter.ContainsCrash/Parse disagree:\n%s", mon.output[mon.matchPos:]))
234	}
235	start := mon.matchPos + rep.StartPos - beforeContext
236	if start < 0 {
237		start = 0
238	}
239	end := mon.matchPos + rep.EndPos + afterContext
240	if end > len(mon.output) {
241		end = len(mon.output)
242	}
243	rep.Output = mon.output[start:end]
244	rep.StartPos += mon.matchPos - start
245	rep.EndPos += mon.matchPos - start
246	return rep
247}
248
249func (mon *monitor) waitForOutput() {
250	timer := time.NewTimer(10 * time.Second)
251	for {
252		select {
253		case out, ok := <-mon.outc:
254			if !ok {
255				timer.Stop()
256				return
257			}
258			mon.output = append(mon.output, out...)
259		case <-timer.C:
260			return
261		}
262	}
263}
264
265const (
266	beforeContext  = 1024 << 10
267	afterContext   = 128 << 10
268	maxErrorLength = 512
269)
270
271var (
272	executingProgram1 = []byte("executing program")  // syz-fuzzer output
273	executingProgram2 = []byte("executed programs:") // syz-execprog output
274)
275