1// Copyright 2015 syzkaller project authors. All rights reserved. 2// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4// Package vm provides an abstract test machine (VM, physical machine, etc) 5// interface for the rest of the system. 6// For convenience test machines are subsequently collectively called VMs. 7// Package wraps vmimpl package interface with some common functionality 8// and higher-level interface. 9package vm 10 11import ( 12 "bytes" 13 "fmt" 14 "os" 15 "time" 16 17 "github.com/google/syzkaller/pkg/mgrconfig" 18 "github.com/google/syzkaller/pkg/osutil" 19 "github.com/google/syzkaller/pkg/report" 20 "github.com/google/syzkaller/vm/vmimpl" 21 22 // Import all VM implementations, so that users only need to import vm. 23 _ "github.com/google/syzkaller/vm/adb" 24 _ "github.com/google/syzkaller/vm/gce" 25 _ "github.com/google/syzkaller/vm/gvisor" 26 _ "github.com/google/syzkaller/vm/isolated" 27 _ "github.com/google/syzkaller/vm/kvm" 28 _ "github.com/google/syzkaller/vm/odroid" 29 _ "github.com/google/syzkaller/vm/qemu" 30) 31 32type Pool struct { 33 impl vmimpl.Pool 34 workdir string 35} 36 37type Instance struct { 38 impl vmimpl.Instance 39 workdir string 40 index int 41} 42 43var ( 44 Shutdown = vmimpl.Shutdown 45 ErrTimeout = vmimpl.ErrTimeout 46) 47 48type BootErrorer interface { 49 BootError() (string, []byte) 50} 51 52func Create(cfg *mgrconfig.Config, debug bool) (*Pool, error) { 53 env := &vmimpl.Env{ 54 Name: cfg.Name, 55 OS: cfg.TargetOS, 56 Arch: cfg.TargetVMArch, 57 Workdir: cfg.Workdir, 58 Image: cfg.Image, 59 SSHKey: cfg.SSHKey, 60 SSHUser: cfg.SSHUser, 61 Debug: debug, 62 Config: cfg.VM, 63 } 64 impl, err := vmimpl.Create(cfg.Type, env) 65 if err != nil { 66 return nil, err 67 } 68 return &Pool{ 69 impl: impl, 70 workdir: env.Workdir, 71 }, nil 72} 73 74func (pool *Pool) Count() int { 75 return pool.impl.Count() 76} 77 78func (pool *Pool) Create(index int) (*Instance, error) { 79 if index < 0 || index >= pool.Count() { 80 return nil, fmt.Errorf("invalid VM index %v (count %v)", index, pool.Count()) 81 } 82 workdir, err := osutil.ProcessTempDir(pool.workdir) 83 if err != nil { 84 return nil, fmt.Errorf("failed to create instance temp dir: %v", err) 85 } 86 impl, err := pool.impl.Create(workdir, index) 87 if err != nil { 88 os.RemoveAll(workdir) 89 return nil, err 90 } 91 return &Instance{ 92 impl: impl, 93 workdir: workdir, 94 index: index, 95 }, nil 96} 97 98func (inst *Instance) Copy(hostSrc string) (string, error) { 99 return inst.impl.Copy(hostSrc) 100} 101 102func (inst *Instance) Forward(port int) (string, error) { 103 return inst.impl.Forward(port) 104} 105 106func (inst *Instance) Run(timeout time.Duration, stop <-chan bool, command string) ( 107 outc <-chan []byte, errc <-chan error, err error) { 108 return inst.impl.Run(timeout, stop, command) 109} 110 111func (inst *Instance) Diagnose() bool { 112 return inst.impl.Diagnose() 113} 114 115func (inst *Instance) Close() { 116 inst.impl.Close() 117 os.RemoveAll(inst.workdir) 118} 119 120// MonitorExecution monitors execution of a program running inside of a VM. 121// It detects kernel oopses in output, lost connections, hangs, etc. 122// outc/errc is what vm.Instance.Run returns, reporter parses kernel output for oopses. 123// If canExit is false and the program exits, it is treated as an error. 124// Returns a non-symbolized crash report, or nil if no error happens. 125func (inst *Instance) MonitorExecution(outc <-chan []byte, errc <-chan error, 126 reporter report.Reporter, canExit bool) (rep *report.Report) { 127 mon := &monitor{ 128 inst: inst, 129 outc: outc, 130 errc: errc, 131 reporter: reporter, 132 canExit: canExit, 133 } 134 lastExecuteTime := time.Now() 135 ticker := time.NewTicker(10 * time.Second) 136 defer ticker.Stop() 137 for { 138 select { 139 case err := <-errc: 140 switch err { 141 case nil: 142 // The program has exited without errors, 143 // but wait for kernel output in case there is some delayed oops. 144 return mon.extractError("") 145 case ErrTimeout: 146 return nil 147 default: 148 // Note: connection lost can race with a kernel oops message. 149 // In such case we want to return the kernel oops. 150 return mon.extractError("lost connection to test machine") 151 } 152 case out := <-outc: 153 lastPos := len(mon.output) 154 mon.output = append(mon.output, out...) 155 if bytes.Contains(mon.output[lastPos:], executingProgram1) || 156 bytes.Contains(mon.output[lastPos:], executingProgram2) { 157 lastExecuteTime = time.Now() 158 } 159 if reporter.ContainsCrash(mon.output[mon.matchPos:]) { 160 return mon.extractError("unknown error") 161 } 162 if len(mon.output) > 2*beforeContext { 163 copy(mon.output, mon.output[len(mon.output)-beforeContext:]) 164 mon.output = mon.output[:beforeContext] 165 } 166 mon.matchPos = len(mon.output) - maxErrorLength 167 if mon.matchPos < 0 { 168 mon.matchPos = 0 169 } 170 case <-ticker.C: 171 // Detect both "not output whatsoever" and "kernel episodically prints 172 // something to console, but fuzzer is not actually executing programs". 173 // The timeout used to be 3 mins for a long time. 174 // But (1) we were seeing flakes on linux where net namespace 175 // destruction can be really slow, and (2) gVisor watchdog timeout 176 // is 3 mins + 1/4 of that for checking period = 3m45s. 177 // Current linux max timeout is CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=140 178 // and workqueue.watchdog_thresh=140 which both actually result 179 // in 140-280s detection delay. 180 // So the current timeout is 5 mins (300s). 181 // We don't want it to be too long too because it will waste time on real hangs. 182 if time.Since(lastExecuteTime) < 5*time.Minute { 183 break 184 } 185 if inst.Diagnose() { 186 mon.waitForOutput() 187 } 188 rep := &report.Report{ 189 Title: "no output from test machine", 190 Output: mon.output, 191 Suppressed: report.IsSuppressed(mon.reporter, mon.output), 192 } 193 return rep 194 case <-Shutdown: 195 return nil 196 } 197 } 198} 199 200type monitor struct { 201 inst *Instance 202 outc <-chan []byte 203 errc <-chan error 204 reporter report.Reporter 205 canExit bool 206 output []byte 207 matchPos int 208} 209 210func (mon *monitor) extractError(defaultError string) *report.Report { 211 // Give it some time to finish writing the error message. 212 mon.inst.Diagnose() 213 mon.waitForOutput() 214 if bytes.Contains(mon.output, []byte("SYZ-FUZZER: PREEMPTED")) { 215 return nil 216 } 217 if !mon.reporter.ContainsCrash(mon.output[mon.matchPos:]) { 218 if defaultError == "" { 219 if mon.canExit { 220 return nil 221 } 222 defaultError = "lost connection to test machine" 223 } 224 rep := &report.Report{ 225 Title: defaultError, 226 Output: mon.output, 227 Suppressed: report.IsSuppressed(mon.reporter, mon.output), 228 } 229 return rep 230 } 231 rep := mon.reporter.Parse(mon.output[mon.matchPos:]) 232 if rep == nil { 233 panic(fmt.Sprintf("reporter.ContainsCrash/Parse disagree:\n%s", mon.output[mon.matchPos:])) 234 } 235 start := mon.matchPos + rep.StartPos - beforeContext 236 if start < 0 { 237 start = 0 238 } 239 end := mon.matchPos + rep.EndPos + afterContext 240 if end > len(mon.output) { 241 end = len(mon.output) 242 } 243 rep.Output = mon.output[start:end] 244 rep.StartPos += mon.matchPos - start 245 rep.EndPos += mon.matchPos - start 246 return rep 247} 248 249func (mon *monitor) waitForOutput() { 250 timer := time.NewTimer(10 * time.Second) 251 for { 252 select { 253 case out, ok := <-mon.outc: 254 if !ok { 255 timer.Stop() 256 return 257 } 258 mon.output = append(mon.output, out...) 259 case <-timer.C: 260 return 261 } 262 } 263} 264 265const ( 266 beforeContext = 1024 << 10 267 afterContext = 128 << 10 268 maxErrorLength = 512 269) 270 271var ( 272 executingProgram1 = []byte("executing program") // syz-fuzzer output 273 executingProgram2 = []byte("executed programs:") // syz-execprog output 274) 275