// Copyright 2016 syzkaller project authors. All rights reserved. // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. // Package gce allows to use Google Compute Engine (GCE) virtual machines as VMs. // It is assumed that syz-manager also runs on GCE as VMs are created in the current project/zone. // // See https://cloud.google.com/compute/docs for details. // In particular, how to build GCE-compatible images: // https://cloud.google.com/compute/docs/tutorials/building-images // Working with serial console: // https://cloud.google.com/compute/docs/instances/interacting-with-serial-console package gce import ( "archive/tar" "bytes" "compress/gzip" "fmt" "io" "io/ioutil" "os" "path/filepath" "time" "github.com/google/syzkaller/pkg/config" "github.com/google/syzkaller/pkg/gce" "github.com/google/syzkaller/pkg/gcs" "github.com/google/syzkaller/pkg/kd" "github.com/google/syzkaller/pkg/log" "github.com/google/syzkaller/pkg/osutil" "github.com/google/syzkaller/vm/vmimpl" ) func init() { vmimpl.Register("gce", ctor) } type Config struct { Count int `json:"count"` // number of VMs to use MachineType string `json:"machine_type"` // GCE machine type (e.g. "n1-highcpu-2") GCSPath string `json:"gcs_path"` // GCS path to upload image GCEImage string `json:"gce_image"` // Pre-created GCE image to use } type Pool struct { env *vmimpl.Env cfg *Config GCE *gce.Context } type instance struct { env *vmimpl.Env cfg *Config GCE *gce.Context debug bool name string ip string gceKey string // per-instance private ssh key associated with the instance sshKey string // ssh key sshUser string closed chan bool } func ctor(env *vmimpl.Env) (vmimpl.Pool, error) { if env.Name == "" { return nil, fmt.Errorf("config param name is empty (required for GCE)") } cfg := &Config{ Count: 1, } if err := config.LoadData(env.Config, cfg); err != nil { return nil, fmt.Errorf("failed to parse gce vm config: %v", err) } if cfg.Count < 1 || cfg.Count > 1000 { return nil, fmt.Errorf("invalid config param count: %v, want [1, 1000]", cfg.Count) } if env.Debug { cfg.Count = 1 } if cfg.MachineType == "" { return nil, fmt.Errorf("machine_type parameter is empty") } if cfg.GCEImage == "" && cfg.GCSPath == "" { return nil, fmt.Errorf("gcs_path parameter is empty") } if cfg.GCEImage == "" && env.Image == "" { return nil, fmt.Errorf("config param image is empty (required for GCE)") } if cfg.GCEImage != "" && env.Image != "" { return nil, fmt.Errorf("both image and gce_image are specified") } GCE, err := gce.NewContext() if err != nil { return nil, fmt.Errorf("failed to init gce: %v", err) } log.Logf(0, "GCE initialized: running on %v, internal IP %v, project %v, zone %v, net %v/%v", GCE.Instance, GCE.InternalIP, GCE.ProjectID, GCE.ZoneID, GCE.Network, GCE.Subnetwork) if cfg.GCEImage == "" { cfg.GCEImage = env.Name gcsImage := filepath.Join(cfg.GCSPath, env.Name+"-image.tar.gz") log.Logf(0, "uploading image to %v...", gcsImage) if err := uploadImageToGCS(env.Image, gcsImage); err != nil { return nil, err } log.Logf(0, "creating GCE image %v...", cfg.GCEImage) if err := GCE.DeleteImage(cfg.GCEImage); err != nil { return nil, fmt.Errorf("failed to delete GCE image: %v", err) } if err := GCE.CreateImage(cfg.GCEImage, gcsImage); err != nil { return nil, fmt.Errorf("failed to create GCE image: %v", err) } } pool := &Pool{ cfg: cfg, env: env, GCE: GCE, } return pool, nil } func (pool *Pool) Count() int { return pool.cfg.Count } func (pool *Pool) Create(workdir string, index int) (vmimpl.Instance, error) { name := fmt.Sprintf("%v-%v", pool.env.Name, index) // Create SSH key for the instance. gceKey := filepath.Join(workdir, "key") keygen := osutil.Command("ssh-keygen", "-t", "rsa", "-b", "2048", "-N", "", "-C", "syzkaller", "-f", gceKey) if out, err := keygen.CombinedOutput(); err != nil { return nil, fmt.Errorf("failed to execute ssh-keygen: %v\n%s", err, out) } gceKeyPub, err := ioutil.ReadFile(gceKey + ".pub") if err != nil { return nil, fmt.Errorf("failed to read file: %v", err) } log.Logf(0, "deleting instance: %v", name) if err := pool.GCE.DeleteInstance(name, true); err != nil { return nil, err } log.Logf(0, "creating instance: %v", name) ip, err := pool.GCE.CreateInstance(name, pool.cfg.MachineType, pool.cfg.GCEImage, string(gceKeyPub)) if err != nil { return nil, err } ok := false defer func() { if !ok { pool.GCE.DeleteInstance(name, true) } }() sshKey := pool.env.SSHKey sshUser := pool.env.SSHUser if sshKey == "" { // Assuming image supports GCE ssh fanciness. sshKey = gceKey sshUser = "syzkaller" } log.Logf(0, "wait instance to boot: %v (%v)", name, ip) if err := vmimpl.WaitForSSH(pool.env.Debug, 5*time.Minute, ip, sshKey, sshUser, pool.env.OS, 22); err != nil { output, outputErr := pool.getSerialPortOutput(name, gceKey) if outputErr != nil { output = []byte(fmt.Sprintf("failed to get boot output: %v", outputErr)) } return nil, vmimpl.BootError{Title: err.Error(), Output: output} } ok = true inst := &instance{ env: pool.env, cfg: pool.cfg, debug: pool.env.Debug, GCE: pool.GCE, name: name, ip: ip, gceKey: gceKey, sshKey: sshKey, sshUser: sshUser, closed: make(chan bool), } return inst, nil } func (inst *instance) Close() { close(inst.closed) inst.GCE.DeleteInstance(inst.name, false) } func (inst *instance) Forward(port int) (string, error) { return fmt.Sprintf("%v:%v", inst.GCE.InternalIP, port), nil } func (inst *instance) Copy(hostSrc string) (string, error) { vmDst := "./" + filepath.Base(hostSrc) args := append(vmimpl.SCPArgs(inst.debug, inst.sshKey, 22), hostSrc, inst.sshUser+"@"+inst.ip+":"+vmDst) if err := runCmd(inst.debug, "scp", args...); err != nil { return "", err } return vmDst, nil } func (inst *instance) Run(timeout time.Duration, stop <-chan bool, command string) ( <-chan []byte, <-chan error, error) { conRpipe, conWpipe, err := osutil.LongPipe() if err != nil { return nil, nil, err } conAddr := fmt.Sprintf("%v.%v.%v.syzkaller.port=1@ssh-serialport.googleapis.com", inst.GCE.ProjectID, inst.GCE.ZoneID, inst.name) conArgs := append(vmimpl.SSHArgs(inst.debug, inst.gceKey, 9600), conAddr) con := osutil.Command("ssh", conArgs...) con.Env = []string{} con.Stdout = conWpipe con.Stderr = conWpipe if _, err := con.StdinPipe(); err != nil { // SSH would close connection on stdin EOF conRpipe.Close() conWpipe.Close() return nil, nil, err } if err := con.Start(); err != nil { conRpipe.Close() conWpipe.Close() return nil, nil, fmt.Errorf("failed to connect to console server: %v", err) } conWpipe.Close() var tee io.Writer if inst.debug { tee = os.Stdout } merger := vmimpl.NewOutputMerger(tee) var decoder func(data []byte) (int, int, []byte) if inst.env.OS == "windows" { decoder = kd.Decode } merger.AddDecoder("console", conRpipe, decoder) if err := waitForConsoleConnect(merger); err != nil { con.Process.Kill() merger.Wait() return nil, nil, err } sshRpipe, sshWpipe, err := osutil.LongPipe() if err != nil { con.Process.Kill() merger.Wait() sshRpipe.Close() return nil, nil, err } if inst.env.OS == "linux" { if inst.sshUser != "root" { command = fmt.Sprintf("sudo bash -c '%v'", command) } } args := append(vmimpl.SSHArgs(inst.debug, inst.sshKey, 22), inst.sshUser+"@"+inst.ip, command) ssh := osutil.Command("ssh", args...) ssh.Stdout = sshWpipe ssh.Stderr = sshWpipe if err := ssh.Start(); err != nil { con.Process.Kill() merger.Wait() sshRpipe.Close() sshWpipe.Close() return nil, nil, fmt.Errorf("failed to connect to instance: %v", err) } sshWpipe.Close() merger.Add("ssh", sshRpipe) errc := make(chan error, 1) signal := func(err error) { select { case errc <- err: default: } } go func() { select { case <-time.After(timeout): signal(vmimpl.ErrTimeout) case <-stop: signal(vmimpl.ErrTimeout) case <-inst.closed: signal(fmt.Errorf("instance closed")) case err := <-merger.Err: con.Process.Kill() ssh.Process.Kill() merger.Wait() con.Wait() if cmdErr := ssh.Wait(); cmdErr == nil { // If the command exited successfully, we got EOF error from merger. // But in this case no error has happened and the EOF is expected. err = nil } else if merr, ok := err.(vmimpl.MergerError); ok && merr.R == conRpipe { // Console connection must never fail. If it does, it's either // instance preemption or a GCE bug. In either case, not a kernel bug. log.Logf(1, "%v: gce console connection failed with %v", inst.name, merr.Err) err = vmimpl.ErrTimeout } else { // Check if the instance was terminated due to preemption or host maintenance. time.Sleep(5 * time.Second) // just to avoid any GCE races if !inst.GCE.IsInstanceRunning(inst.name) { log.Logf(1, "%v: ssh exited but instance is not running", inst.name) err = vmimpl.ErrTimeout } } signal(err) return } con.Process.Kill() ssh.Process.Kill() merger.Wait() con.Wait() ssh.Wait() }() return merger.Output, errc, nil } func waitForConsoleConnect(merger *vmimpl.OutputMerger) error { // We've started the console reading ssh command, but it has not necessary connected yet. // If we proceed to running the target command right away, we can miss part // of console output. During repro we can crash machines very quickly and // would miss beginning of a crash. Before ssh starts piping console output, // it usually prints: // "serialport: Connected to ... port 1 (session ID: ..., active connections: 1)" // So we wait for this line, or at least a minute and at least some output. timeout := time.NewTimer(time.Minute) defer timeout.Stop() connectedMsg := []byte("serialport: Connected") permissionDeniedMsg := []byte("Permission denied (publickey)") var output []byte for { select { case out := <-merger.Output: output = append(output, out...) if bytes.Contains(output, connectedMsg) { // Just to make sure (otherwise we still see trimmed reports). time.Sleep(5 * time.Second) return nil } if bytes.Contains(output, permissionDeniedMsg) { // This is a GCE bug. return fmt.Errorf("broken console: %s", permissionDeniedMsg) } case <-timeout.C: if len(output) == 0 { return fmt.Errorf("broken console: no output") } return nil } } } func (inst *instance) Diagnose() bool { return false } func (pool *Pool) getSerialPortOutput(name, gceKey string) ([]byte, error) { conRpipe, conWpipe, err := osutil.LongPipe() if err != nil { return nil, err } defer conRpipe.Close() defer conWpipe.Close() conAddr := fmt.Sprintf("%v.%v.%v.syzkaller.port=1.replay-lines=10000@ssh-serialport.googleapis.com", pool.GCE.ProjectID, pool.GCE.ZoneID, name) conArgs := append(vmimpl.SSHArgs(pool.env.Debug, gceKey, 9600), conAddr) con := osutil.Command("ssh", conArgs...) con.Env = []string{} con.Stdout = conWpipe con.Stderr = conWpipe if _, err := con.StdinPipe(); err != nil { // SSH would close connection on stdin EOF return nil, err } if err := con.Start(); err != nil { return nil, fmt.Errorf("failed to connect to console server: %v", err) } conWpipe.Close() done := make(chan bool) go func() { timeout := time.NewTimer(time.Minute) defer timeout.Stop() select { case <-done: case <-timeout.C: } con.Process.Kill() }() var output []byte buf := make([]byte, 64<<10) for { n, err := conRpipe.Read(buf) if err != nil || n == 0 { break } output = append(output, buf[:n]...) } close(done) con.Wait() return output, nil } func uploadImageToGCS(localImage, gcsImage string) error { GCS, err := gcs.NewClient() if err != nil { return fmt.Errorf("failed to create GCS client: %v", err) } defer GCS.Close() localReader, err := os.Open(localImage) if err != nil { return fmt.Errorf("failed to open image file: %v", err) } defer localReader.Close() localStat, err := localReader.Stat() if err != nil { return fmt.Errorf("failed to stat image file: %v", err) } gcsWriter, err := GCS.FileWriter(gcsImage) if err != nil { return fmt.Errorf("failed to upload image: %v", err) } defer gcsWriter.Close() gzipWriter := gzip.NewWriter(gcsWriter) tarWriter := tar.NewWriter(gzipWriter) tarHeader := &tar.Header{ Name: "disk.raw", Typeflag: tar.TypeReg, Mode: 0640, Size: localStat.Size(), ModTime: time.Now(), // This is hacky but we actually need these large uids. // GCE understands only the old GNU tar format and // there is no direct way to force tar package to use GNU format. // But these large numbers force tar to switch to GNU format. Uid: 100000000, Gid: 100000000, Uname: "syzkaller", Gname: "syzkaller", } if err := tarWriter.WriteHeader(tarHeader); err != nil { return fmt.Errorf("failed to write image tar header: %v", err) } if _, err := io.Copy(tarWriter, localReader); err != nil { return fmt.Errorf("failed to write image file: %v", err) } if err := tarWriter.Close(); err != nil { return fmt.Errorf("failed to write image file: %v", err) } if err := gzipWriter.Close(); err != nil { return fmt.Errorf("failed to write image file: %v", err) } if err := gcsWriter.Close(); err != nil { return fmt.Errorf("failed to write image file: %v", err) } return nil } func runCmd(debug bool, bin string, args ...string) error { if debug { log.Logf(0, "running command: %v %#v", bin, args) } output, err := osutil.RunCmd(time.Minute, "", bin, args...) if debug { log.Logf(0, "result: %v\n%s", err, output) } return err }