• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 mod process;
6 mod vcpu;
7 
8 use std::fmt::{self, Display};
9 use std::fs::File;
10 use std::io;
11 use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd};
12 use std::os::unix::net::UnixDatagram;
13 use std::path::Path;
14 use std::result;
15 use std::sync::atomic::{AtomicBool, Ordering};
16 use std::sync::{Arc, Barrier};
17 use std::thread;
18 use std::time::{Duration, Instant};
19 
20 use libc::{
21     c_int, c_ulong, fcntl, ioctl, socketpair, AF_UNIX, EAGAIN, EBADF, EDEADLK, EEXIST, EINTR,
22     EINVAL, ENOENT, EOVERFLOW, EPERM, FIOCLEX, F_SETPIPE_SZ, MS_NODEV, MS_NOEXEC, MS_NOSUID,
23     SIGCHLD, SOCK_SEQPACKET,
24 };
25 
26 use protobuf::ProtobufError;
27 use remain::sorted;
28 
29 use io_jail::{self, Minijail};
30 use kvm::{Datamatch, IoeventAddress, Kvm, Vcpu, VcpuExit, Vm};
31 use net_util::{Error as TapError, Tap, TapT};
32 use sys_util::{
33     block_signal, clear_signal, drop_capabilities, error, getegid, geteuid, info, pipe,
34     register_signal_handler, validate_raw_fd, warn, Error as SysError, EventFd, GuestMemory,
35     Killable, MmapError, PollContext, PollToken, Result as SysResult, SignalFd, SignalFdError,
36     SIGRTMIN,
37 };
38 
39 use self::process::*;
40 use self::vcpu::*;
41 use crate::{Config, Executable};
42 
43 const MAX_DATAGRAM_SIZE: usize = 4096;
44 const MAX_VCPU_DATAGRAM_SIZE: usize = 0x40000;
45 
46 /// An error that occurs during the lifetime of a plugin process.
47 #[sorted]
48 pub enum Error {
49     CloneEventFd(SysError),
50     CloneVcpuPipe(io::Error),
51     CreateEventFd(SysError),
52     CreateIrqChip(SysError),
53     CreateJail(io_jail::Error),
54     CreateKvm(SysError),
55     CreateMainSocket(SysError),
56     CreatePIT(SysError),
57     CreatePollContext(SysError),
58     CreateSignalFd(SignalFdError),
59     CreateSocketPair(io::Error),
60     CreateTapFd(TapError),
61     CreateVcpu(SysError),
62     CreateVcpuSocket(SysError),
63     CreateVm(SysError),
64     DecodeRequest(ProtobufError),
65     DropCapabilities(SysError),
66     EncodeResponse(ProtobufError),
67     Mount(io_jail::Error),
68     MountDev(io_jail::Error),
69     MountLib(io_jail::Error),
70     MountLib64(io_jail::Error),
71     MountPlugin(io_jail::Error),
72     MountPluginLib(io_jail::Error),
73     MountRoot(io_jail::Error),
74     NoRootDir,
75     ParsePivotRoot(io_jail::Error),
76     ParseSeccomp(io_jail::Error),
77     PluginFailed(i32),
78     PluginKill(SysError),
79     PluginKilled(i32),
80     PluginRunJail(io_jail::Error),
81     PluginSocketHup,
82     PluginSocketPoll(SysError),
83     PluginSocketRecv(SysError),
84     PluginSocketSend(SysError),
85     PluginSpawn(io::Error),
86     PluginTimeout,
87     PluginWait(SysError),
88     Poll(SysError),
89     PollContextAdd(SysError),
90     RootNotAbsolute,
91     RootNotDir,
92     SetGidMap(io_jail::Error),
93     SetUidMap(io_jail::Error),
94     SigChild {
95         pid: u32,
96         signo: u32,
97         status: i32,
98         code: i32,
99     },
100     SignalFd(SignalFdError),
101     SpawnVcpu(io::Error),
102     TapEnable(TapError),
103     TapOpen(TapError),
104     TapSetIp(TapError),
105     TapSetMacAddress(TapError),
106     TapSetNetmask(TapError),
107     ValidateTapFd(SysError),
108 }
109 
110 impl Display for Error {
111     #[remain::check]
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result112     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
113         use self::Error::*;
114 
115         #[sorted]
116         match self {
117             CloneEventFd(e) => write!(f, "failed to clone eventfd: {}", e),
118             CloneVcpuPipe(e) => write!(f, "failed to clone vcpu pipe: {}", e),
119             CreateEventFd(e) => write!(f, "failed to create eventfd: {}", e),
120             CreateIrqChip(e) => write!(f, "failed to create kvm irqchip: {}", e),
121             CreateJail(e) => write!(f, "failed to create jail: {}", e),
122             CreateKvm(e) => write!(f, "error creating Kvm: {}", e),
123             CreateMainSocket(e) => write!(f, "error creating main request socket: {}", e),
124             CreatePIT(e) => write!(f, "failed to create kvm PIT: {}", e),
125             CreatePollContext(e) => write!(f, "failed to create poll context: {}", e),
126             CreateSignalFd(e) => write!(f, "failed to create signalfd: {}", e),
127             CreateSocketPair(e) => write!(f, "failed to create socket pair: {}", e),
128             CreateTapFd(e) => write!(f, "failed to create tap device from raw fd: {}", e),
129             CreateVcpu(e) => write!(f, "error creating vcpu: {}", e),
130             CreateVcpuSocket(e) => write!(f, "error creating vcpu request socket: {}", e),
131             CreateVm(e) => write!(f, "error creating vm: {}", e),
132             DecodeRequest(e) => write!(f, "failed to decode plugin request: {}", e),
133             DropCapabilities(e) => write!(f, "failed to drop process capabilities: {}", e),
134             EncodeResponse(e) => write!(f, "failed to encode plugin response: {}", e),
135             Mount(e) | MountDev(e) | MountLib(e) | MountLib64(e) | MountPlugin(e)
136             | MountPluginLib(e) | MountRoot(e) => write!(f, "failed to mount: {}", e),
137             NoRootDir => write!(f, "no root directory for jailed process to pivot root into"),
138             ParsePivotRoot(e) => write!(f, "failed to set jail pivot root: {}", e),
139             ParseSeccomp(e) => write!(f, "failed to parse jail seccomp filter: {}", e),
140             PluginFailed(e) => write!(f, "plugin exited with error: {}", e),
141             PluginKill(e) => write!(f, "error sending kill signal to plugin: {}", e),
142             PluginKilled(e) => write!(f, "plugin exited with signal {}", e),
143             PluginRunJail(e) => write!(f, "failed to run jail: {}", e),
144             PluginSocketHup => write!(f, "plugin request socket has been hung up"),
145             PluginSocketPoll(e) => write!(f, "failed to poll plugin request sockets: {}", e),
146             PluginSocketRecv(e) => write!(f, "failed to recv from plugin request socket: {}", e),
147             PluginSocketSend(e) => write!(f, "failed to send to plugin request socket: {}", e),
148             PluginSpawn(e) => write!(f, "failed to spawn plugin: {}", e),
149             PluginTimeout => write!(f, "plugin did not exit within timeout"),
150             PluginWait(e) => write!(f, "error waiting for plugin to exit: {}", e),
151             Poll(e) => write!(f, "failed to poll all FDs: {}", e),
152             PollContextAdd(e) => write!(f, "failed to add fd to poll context: {}", e),
153             RootNotAbsolute => write!(f, "path to the root directory must be absolute"),
154             RootNotDir => write!(f, "specified root directory is not a directory"),
155             SetGidMap(e) => write!(f, "failed to set gidmap for jail: {}", e),
156             SetUidMap(e) => write!(f, "failed to set uidmap for jail: {}", e),
157             SigChild {
158                 pid,
159                 signo,
160                 status,
161                 code,
162             } => write!(
163                 f,
164                 "process {} died with signal {}, status {}, and code {}",
165                 pid, signo, status, code
166             ),
167             SignalFd(e) => write!(f, "failed to read signal fd: {}", e),
168             SpawnVcpu(e) => write!(f, "error spawning vcpu thread: {}", e),
169             TapEnable(e) => write!(f, "error enabling tap device: {}", e),
170             TapOpen(e) => write!(f, "error opening tap device: {}", e),
171             TapSetIp(e) => write!(f, "error setting tap ip: {}", e),
172             TapSetMacAddress(e) => write!(f, "error setting tap mac address: {}", e),
173             TapSetNetmask(e) => write!(f, "error setting tap netmask: {}", e),
174             ValidateTapFd(e) => write!(f, "failed to validate raw tap fd: {}", e),
175         }
176     }
177 }
178 
179 type Result<T> = result::Result<T, Error>;
180 
downcast_file<F: IntoRawFd>(f: F) -> File181 fn downcast_file<F: IntoRawFd>(f: F) -> File {
182     unsafe { File::from_raw_fd(f.into_raw_fd()) }
183 }
184 
new_seqpacket_pair() -> SysResult<(UnixDatagram, UnixDatagram)>185 fn new_seqpacket_pair() -> SysResult<(UnixDatagram, UnixDatagram)> {
186     let mut fds = [0, 0];
187     unsafe {
188         let ret = socketpair(AF_UNIX, SOCK_SEQPACKET, 0, fds.as_mut_ptr());
189         if ret == 0 {
190             ioctl(fds[0], FIOCLEX);
191             Ok((
192                 UnixDatagram::from_raw_fd(fds[0]),
193                 UnixDatagram::from_raw_fd(fds[1]),
194             ))
195         } else {
196             Err(SysError::last())
197         }
198     }
199 }
200 
201 struct VcpuPipe {
202     crosvm_read: File,
203     plugin_write: File,
204     plugin_read: File,
205     crosvm_write: File,
206 }
207 
new_pipe_pair() -> SysResult<VcpuPipe>208 fn new_pipe_pair() -> SysResult<VcpuPipe> {
209     let to_crosvm = pipe(true)?;
210     let to_plugin = pipe(true)?;
211     // Increasing the pipe size can be a nice-to-have to make sure that
212     // messages get across atomically (and made sure that writes don't block),
213     // though it's not necessary a hard requirement for things to work.
214     let flags = unsafe {
215         fcntl(
216             to_crosvm.0.as_raw_fd(),
217             F_SETPIPE_SZ,
218             MAX_VCPU_DATAGRAM_SIZE as c_int,
219         )
220     };
221     if flags < 0 || flags != MAX_VCPU_DATAGRAM_SIZE as i32 {
222         warn!(
223             "Failed to adjust size of crosvm pipe (result {}): {}",
224             flags,
225             SysError::last()
226         );
227     }
228     let flags = unsafe {
229         fcntl(
230             to_plugin.0.as_raw_fd(),
231             F_SETPIPE_SZ,
232             MAX_VCPU_DATAGRAM_SIZE as c_int,
233         )
234     };
235     if flags < 0 || flags != MAX_VCPU_DATAGRAM_SIZE as i32 {
236         warn!(
237             "Failed to adjust size of plugin pipe (result {}): {}",
238             flags,
239             SysError::last()
240         );
241     }
242     Ok(VcpuPipe {
243         crosvm_read: to_crosvm.0,
244         plugin_write: to_crosvm.1,
245         plugin_read: to_plugin.0,
246         crosvm_write: to_plugin.1,
247     })
248 }
249 
proto_to_sys_err(e: ProtobufError) -> SysError250 fn proto_to_sys_err(e: ProtobufError) -> SysError {
251     match e {
252         ProtobufError::IoError(e) => SysError::new(e.raw_os_error().unwrap_or(EINVAL)),
253         _ => SysError::new(EINVAL),
254     }
255 }
256 
io_to_sys_err(e: io::Error) -> SysError257 fn io_to_sys_err(e: io::Error) -> SysError {
258     SysError::new(e.raw_os_error().unwrap_or(EINVAL))
259 }
260 
mmap_to_sys_err(e: MmapError) -> SysError261 fn mmap_to_sys_err(e: MmapError) -> SysError {
262     match e {
263         MmapError::SystemCallFailed(e) => e,
264         _ => SysError::new(EINVAL),
265     }
266 }
267 
create_plugin_jail(root: &Path, seccomp_policy: &Path) -> Result<Minijail>268 fn create_plugin_jail(root: &Path, seccomp_policy: &Path) -> Result<Minijail> {
269     // All child jails run in a new user namespace without any users mapped,
270     // they run as nobody unless otherwise configured.
271     let mut j = Minijail::new().map_err(Error::CreateJail)?;
272     j.namespace_pids();
273     j.namespace_user();
274     j.uidmap(&format!("0 {0} 1", geteuid()))
275         .map_err(Error::SetUidMap)?;
276     j.gidmap(&format!("0 {0} 1", getegid()))
277         .map_err(Error::SetGidMap)?;
278     j.namespace_user_disable_setgroups();
279     // Don't need any capabilities.
280     j.use_caps(0);
281     // Create a new mount namespace with an empty root FS.
282     j.namespace_vfs();
283     j.enter_pivot_root(root).map_err(Error::ParsePivotRoot)?;
284     // Run in an empty network namespace.
285     j.namespace_net();
286     j.no_new_privs();
287     // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP, which will correctly kill
288     // the entire plugin process if a worker thread commits a seccomp violation.
289     j.set_seccomp_filter_tsync();
290     #[cfg(debug_assertions)]
291     j.log_seccomp_filter_failures();
292     j.parse_seccomp_filters(seccomp_policy)
293         .map_err(Error::ParseSeccomp)?;
294     j.use_seccomp_filter();
295     // Don't do init setup.
296     j.run_as_init();
297 
298     // Create a tmpfs in the plugin's root directory so that we can bind mount it's executable
299     // file into it.  The size=67108864 is size=64*1024*1024 or size=64MB.
300     j.mount_with_data(
301         Path::new("none"),
302         Path::new("/"),
303         "tmpfs",
304         (MS_NOSUID | MS_NODEV | MS_NOEXEC) as usize,
305         "size=67108864",
306     )
307     .map_err(Error::MountRoot)?;
308 
309     Ok(j)
310 }
311 
312 /// Each `PluginObject` represents one object that was instantiated by the guest using the `Create`
313 /// request.
314 ///
315 /// Each such object has an ID associated with it that exists in an ID space shared by every variant
316 /// of `PluginObject`. This allows all the objects to be indexed in a single map, and allows for a
317 /// common destroy method.
318 ///
319 
320 /// In addition to the destory method, each object may have methods specific to its variant type.
321 /// These variant methods must be done by matching the variant to the expected type for that method.
322 /// For example, getting the dirty log from a `Memory` object starting with an ID:
323 ///
324 /// ```
325 /// match objects.get(&request_id) {
326 ///    Some(&PluginObject::Memory { slot, length }) => vm.get_dirty_log(slot, &mut dirty_log[..])
327 ///    _ => return Err(SysError::new(ENOENT)),
328 /// }
329 /// ```
330 enum PluginObject {
331     IoEvent {
332         evt: EventFd,
333         addr: IoeventAddress,
334         length: u32,
335         datamatch: u64,
336     },
337     Memory {
338         slot: u32,
339         length: usize,
340     },
341     IrqEvent {
342         irq_id: u32,
343         evt: EventFd,
344     },
345 }
346 
347 impl PluginObject {
destroy(self, vm: &mut Vm) -> SysResult<()>348     fn destroy(self, vm: &mut Vm) -> SysResult<()> {
349         match self {
350             PluginObject::IoEvent {
351                 evt,
352                 addr,
353                 length,
354                 datamatch,
355             } => match length {
356                 0 => vm.unregister_ioevent(&evt, addr, Datamatch::AnyLength),
357                 1 => vm.unregister_ioevent(&evt, addr, Datamatch::U8(Some(datamatch as u8))),
358                 2 => vm.unregister_ioevent(&evt, addr, Datamatch::U16(Some(datamatch as u16))),
359                 4 => vm.unregister_ioevent(&evt, addr, Datamatch::U32(Some(datamatch as u32))),
360                 8 => vm.unregister_ioevent(&evt, addr, Datamatch::U64(Some(datamatch as u64))),
361                 _ => Err(SysError::new(EINVAL)),
362             },
363             PluginObject::Memory { slot, .. } => vm.remove_device_memory(slot).and(Ok(())),
364             PluginObject::IrqEvent { irq_id, evt } => vm.unregister_irqfd(&evt, irq_id),
365         }
366     }
367 }
368 
run_vcpus( kvm: &Kvm, vm: &Vm, plugin: &Process, vcpu_count: u32, kill_signaled: &Arc<AtomicBool>, exit_evt: &EventFd, vcpu_handles: &mut Vec<thread::JoinHandle<()>>, ) -> Result<()>369 pub fn run_vcpus(
370     kvm: &Kvm,
371     vm: &Vm,
372     plugin: &Process,
373     vcpu_count: u32,
374     kill_signaled: &Arc<AtomicBool>,
375     exit_evt: &EventFd,
376     vcpu_handles: &mut Vec<thread::JoinHandle<()>>,
377 ) -> Result<()> {
378     let vcpu_thread_barrier = Arc::new(Barrier::new((vcpu_count) as usize));
379     for cpu_id in 0..vcpu_count {
380         let kill_signaled = kill_signaled.clone();
381         let vcpu_thread_barrier = vcpu_thread_barrier.clone();
382         let vcpu_exit_evt = exit_evt.try_clone().map_err(Error::CloneEventFd)?;
383         let vcpu_plugin = plugin.create_vcpu(cpu_id)?;
384         let vcpu = Vcpu::new(cpu_id as c_ulong, kvm, vm).map_err(Error::CreateVcpu)?;
385 
386         vcpu_handles.push(
387             thread::Builder::new()
388                 .name(format!("crosvm_vcpu{}", cpu_id))
389                 .spawn(move || {
390                     unsafe {
391                         extern "C" fn handle_signal() {}
392                         // Our signal handler does nothing and is trivially async signal safe.
393                         // We need to install this signal handler even though we do block
394                         // the signal below, to ensure that this signal will interrupt
395                         // execution of KVM_RUN (this is implementation issue).
396                         register_signal_handler(SIGRTMIN() + 0, handle_signal)
397                             .expect("failed to register vcpu signal handler");
398                     }
399 
400                     // We do not really want the signal handler to run...
401                     block_signal(SIGRTMIN() + 0).expect("failed to block signal");
402                     // Tell KVM to not block anything when entering kvm run
403                     // because we will be using first RT signal to kick the VCPU.
404                     vcpu.set_signal_mask(&[])
405                         .expect("failed to set up KVM VCPU signal mask");
406 
407                     let res = vcpu_plugin.init(&vcpu);
408                     vcpu_thread_barrier.wait();
409                     if let Err(e) = res {
410                         error!("failed to initialize vcpu {}: {}", cpu_id, e);
411                     } else {
412                         loop {
413                             let mut interrupted_by_signal = false;
414                             let run_res = vcpu.run();
415                             match run_res {
416                                 Ok(run) => match run {
417                                     VcpuExit::IoIn { port, mut size } => {
418                                         let mut data = [0; 256];
419                                         if size > data.len() {
420                                             error!("unsupported IoIn size of {} bytes", size);
421                                             size = data.len();
422                                         }
423                                         vcpu_plugin.io_read(port as u64, &mut data[..size], &vcpu);
424                                         if let Err(e) = vcpu.set_data(&data[..size]) {
425                                             error!("failed to set return data for IoIn: {}", e);
426                                         }
427                                     }
428                                     VcpuExit::IoOut {
429                                         port,
430                                         mut size,
431                                         data,
432                                     } => {
433                                         if size > data.len() {
434                                             error!("unsupported IoOut size of {} bytes", size);
435                                             size = data.len();
436                                         }
437                                         vcpu_plugin.io_write(port as u64, &data[..size], &vcpu);
438                                     }
439                                     VcpuExit::MmioRead { address, size } => {
440                                         let mut data = [0; 8];
441                                         vcpu_plugin.mmio_read(
442                                             address as u64,
443                                             &mut data[..size],
444                                             &vcpu,
445                                         );
446                                         // Setting data for mmio can not fail.
447                                         let _ = vcpu.set_data(&data[..size]);
448                                     }
449                                     VcpuExit::MmioWrite {
450                                         address,
451                                         size,
452                                         data,
453                                     } => {
454                                         vcpu_plugin.mmio_write(
455                                             address as u64,
456                                             &data[..size],
457                                             &vcpu,
458                                         );
459                                     }
460                                     VcpuExit::Hlt => break,
461                                     VcpuExit::Shutdown => break,
462                                     VcpuExit::InternalError => {
463                                         error!("vcpu {} has internal error", cpu_id);
464                                         break;
465                                     }
466                                     r => warn!("unexpected vcpu exit: {:?}", r),
467                                 },
468                                 Err(e) => match e.errno() {
469                                     EINTR => interrupted_by_signal = true,
470                                     EAGAIN => {}
471                                     _ => {
472                                         error!("vcpu hit unknown error: {}", e);
473                                         break;
474                                     }
475                                 },
476                             }
477                             if kill_signaled.load(Ordering::SeqCst) {
478                                 break;
479                             }
480 
481                             // Try to clear the signal that we use to kick VCPU if it is
482                             // pending before attempting to handle pause requests.
483                             if interrupted_by_signal {
484                                 clear_signal(SIGRTMIN() + 0)
485                                     .expect("failed to clear pending signal");
486                             }
487 
488                             if let Err(e) = vcpu_plugin.pre_run(&vcpu) {
489                                 error!("failed to process pause on vcpu {}: {}", cpu_id, e);
490                                 break;
491                             }
492                         }
493                     }
494                     vcpu_exit_evt
495                         .write(1)
496                         .expect("failed to signal vcpu exit eventfd");
497                 })
498                 .map_err(Error::SpawnVcpu)?,
499         );
500     }
501     Ok(())
502 }
503 
504 #[derive(PollToken)]
505 enum Token {
506     Exit,
507     ChildSignal,
508     Plugin { index: usize },
509 }
510 
511 /// Run a VM with a plugin process specified by `cfg`.
512 ///
513 /// Not every field of `cfg` will be used. In particular, most field that pertain to a specific
514 /// device are ignored because the plugin is responsible for emulating hardware.
run_config(cfg: Config) -> Result<()>515 pub fn run_config(cfg: Config) -> Result<()> {
516     info!("crosvm starting plugin process");
517 
518     // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
519     // before any jailed devices have been spawned, so that we can catch any of them that fail very
520     // quickly.
521     let sigchld_fd = SignalFd::new(SIGCHLD).map_err(Error::CreateSignalFd)?;
522 
523     let jail = if cfg.sandbox {
524         // An empty directory for jailed plugin pivot root.
525         let root_path = match &cfg.plugin_root {
526             Some(dir) => dir,
527             None => Path::new("/var/empty"),
528         };
529 
530         if root_path.is_relative() {
531             return Err(Error::RootNotAbsolute);
532         }
533 
534         if !root_path.exists() {
535             return Err(Error::NoRootDir);
536         }
537 
538         if !root_path.is_dir() {
539             return Err(Error::RootNotDir);
540         }
541 
542         let policy_path = cfg.seccomp_policy_dir.join("plugin.policy");
543         let mut jail = create_plugin_jail(root_path, &policy_path)?;
544 
545         // Update gid map of the jail if caller provided supplemental groups.
546         if !cfg.plugin_gid_maps.is_empty() {
547             let map = format!("0 {} 1", getegid())
548                 + &cfg
549                     .plugin_gid_maps
550                     .into_iter()
551                     .map(|m| format!(",{} {} {}", m.inner, m.outer, m.count))
552                     .collect::<String>();
553             jail.gidmap(&map).map_err(Error::SetGidMap)?;
554         }
555 
556         // Mount minimal set of devices (full, zero, urandom, etc). We can not use
557         // jail.mount_dev() here because crosvm may not be running with CAP_SYS_ADMIN.
558         let device_names = ["full", "null", "urandom", "zero"];
559         for name in &device_names {
560             let device = Path::new("/dev").join(&name);
561             jail.mount_bind(&device, &device, true)
562                 .map_err(Error::MountDev)?;
563         }
564 
565         for bind_mount in &cfg.plugin_mounts {
566             jail.mount_bind(&bind_mount.src, &bind_mount.dst, bind_mount.writable)
567                 .map_err(Error::Mount)?;
568         }
569 
570         Some(jail)
571     } else {
572         None
573     };
574 
575     let mut tap_interfaces: Vec<Tap> = Vec::new();
576     if let Some(host_ip) = cfg.host_ip {
577         if let Some(netmask) = cfg.netmask {
578             if let Some(mac_address) = cfg.mac_address {
579                 let tap = Tap::new(false).map_err(Error::TapOpen)?;
580                 tap.set_ip_addr(host_ip).map_err(Error::TapSetIp)?;
581                 tap.set_netmask(netmask).map_err(Error::TapSetNetmask)?;
582                 tap.set_mac_address(mac_address)
583                     .map_err(Error::TapSetMacAddress)?;
584 
585                 tap.enable().map_err(Error::TapEnable)?;
586                 tap_interfaces.push(tap);
587             }
588         }
589     }
590     for tap_fd in cfg.tap_fd {
591         // Safe because we ensure that we get a unique handle to the fd.
592         let tap = unsafe {
593             Tap::from_raw_fd(validate_raw_fd(tap_fd).map_err(Error::ValidateTapFd)?)
594                 .map_err(Error::CreateTapFd)?
595         };
596         tap_interfaces.push(tap);
597     }
598 
599     let plugin_args: Vec<&str> = cfg.params.iter().map(|s| &s[..]).collect();
600 
601     let plugin_path = match cfg.executable_path {
602         Some(Executable::Plugin(ref plugin_path)) => plugin_path.as_path(),
603         _ => panic!("Executable was not a plugin"),
604     };
605     let vcpu_count = cfg.vcpu_count.unwrap_or(1);
606     let mem = GuestMemory::new(&[]).unwrap();
607     let kvm = Kvm::new().map_err(Error::CreateKvm)?;
608     let mut vm = Vm::new(&kvm, mem).map_err(Error::CreateVm)?;
609     vm.create_irq_chip().map_err(Error::CreateIrqChip)?;
610     vm.create_pit().map_err(Error::CreatePIT)?;
611 
612     let mut plugin = Process::new(vcpu_count, plugin_path, &plugin_args, jail)?;
613     // Now that the jail for the plugin has been created and we had a chance to adjust gids there,
614     // we can drop all our capabilities in case we had any.
615     drop_capabilities().map_err(Error::DropCapabilities)?;
616 
617     let mut res = Ok(());
618     // If Some, we will exit after enough time is passed to shutdown cleanly.
619     let mut dying_instant: Option<Instant> = None;
620     let duration_to_die = Duration::from_millis(1000);
621 
622     let exit_evt = EventFd::new().map_err(Error::CreateEventFd)?;
623     let kill_signaled = Arc::new(AtomicBool::new(false));
624     let mut vcpu_handles = Vec::with_capacity(vcpu_count as usize);
625 
626     let poll_ctx = PollContext::new().map_err(Error::CreatePollContext)?;
627     poll_ctx
628         .add(&exit_evt, Token::Exit)
629         .map_err(Error::PollContextAdd)?;
630     poll_ctx
631         .add(&sigchld_fd, Token::ChildSignal)
632         .map_err(Error::PollContextAdd)?;
633 
634     let mut sockets_to_drop = Vec::new();
635     let mut redo_poll_ctx_sockets = true;
636     // In this loop, make every attempt to not return early. If an error is encountered, set `res`
637     // to the error, set `dying_instant` to now, and signal the plugin that it will be killed soon.
638     // If the plugin cannot be signaled because it is dead of `signal_kill` failed, simply break
639     // from the poll loop so that the VCPU threads can be cleaned up.
640     'poll: loop {
641         // After we have waited long enough, it's time to give up and exit.
642         if dying_instant
643             .map(|i| i.elapsed() >= duration_to_die)
644             .unwrap_or(false)
645         {
646             break;
647         }
648 
649         if redo_poll_ctx_sockets {
650             for (index, socket) in plugin.sockets().iter().enumerate() {
651                 poll_ctx
652                     .add(socket, Token::Plugin { index })
653                     .map_err(Error::PollContextAdd)?;
654             }
655         }
656 
657         let plugin_socket_count = plugin.sockets().len();
658         let events = {
659             let poll_res = match dying_instant {
660                 Some(inst) => poll_ctx.wait_timeout(duration_to_die - inst.elapsed()),
661                 None => poll_ctx.wait(),
662             };
663             match poll_res {
664                 Ok(v) => v,
665                 Err(e) => {
666                     // Polling no longer works, time to break and cleanup,
667                     if res.is_ok() {
668                         res = Err(Error::Poll(e));
669                     }
670                     break;
671                 }
672             }
673         };
674         for event in events.iter_readable() {
675             match event.token() {
676                 Token::Exit => {
677                     // No need to check the exit event if we are already doing cleanup.
678                     let _ = poll_ctx.delete(&exit_evt);
679                     dying_instant.get_or_insert(Instant::now());
680                     let sig_res = plugin.signal_kill();
681                     if res.is_ok() && sig_res.is_err() {
682                         res = sig_res.map_err(Error::PluginKill);
683                     }
684                 }
685                 Token::ChildSignal => {
686                     // Print all available siginfo structs, then exit the loop.
687                     loop {
688                         match sigchld_fd.read() {
689                             Ok(Some(siginfo)) => {
690                                 // If the plugin process has ended, there is no need to continue
691                                 // processing plugin connections, so we break early.
692                                 if siginfo.ssi_pid == plugin.pid() as u32 {
693                                     break 'poll;
694                                 }
695                                 // Because SIGCHLD is not expected from anything other than the
696                                 // plugin process, report it as an error.
697                                 if res.is_ok() {
698                                     res = Err(Error::SigChild {
699                                         pid: siginfo.ssi_pid,
700                                         signo: siginfo.ssi_signo,
701                                         status: siginfo.ssi_status,
702                                         code: siginfo.ssi_code,
703                                     })
704                                 }
705                             }
706                             Ok(None) => break, // No more signals to read.
707                             Err(e) => {
708                                 // Something really must be messed up for this to happen, continue
709                                 // processing connections for a limited time.
710                                 if res.is_ok() {
711                                     res = Err(Error::SignalFd(e));
712                                 }
713                                 break;
714                             }
715                         }
716                     }
717                     // As we only spawn the plugin process, getting a SIGCHLD can only mean
718                     // something went wrong.
719                     dying_instant.get_or_insert(Instant::now());
720                     let sig_res = plugin.signal_kill();
721                     if res.is_ok() && sig_res.is_err() {
722                         res = sig_res.map_err(Error::PluginKill);
723                     }
724                 }
725                 Token::Plugin { index } => {
726                     match plugin.handle_socket(index, &kvm, &mut vm, &vcpu_handles, &tap_interfaces)
727                     {
728                         Ok(_) => {}
729                         // A HUP is an expected event for a socket, so don't bother warning about
730                         // it.
731                         Err(Error::PluginSocketHup) => sockets_to_drop.push(index),
732                         // Only one connection out of potentially many is broken. Drop it, but don't
733                         // start cleaning up. Because the error isn't returned, we will warn about
734                         // it here.
735                         Err(e) => {
736                             warn!("error handling plugin socket: {}", e);
737                             sockets_to_drop.push(index);
738                         }
739                     }
740                 }
741             }
742         }
743 
744         if vcpu_handles.is_empty() && dying_instant.is_none() && plugin.is_started() {
745             let res = run_vcpus(
746                 &kvm,
747                 &vm,
748                 &plugin,
749                 vcpu_count,
750                 &kill_signaled,
751                 &exit_evt,
752                 &mut vcpu_handles,
753             );
754             if let Err(e) = res {
755                 dying_instant.get_or_insert(Instant::now());
756                 error!("failed to start vcpus: {}", e);
757             }
758         }
759 
760         redo_poll_ctx_sockets =
761             !sockets_to_drop.is_empty() || plugin.sockets().len() != plugin_socket_count;
762 
763         // Cleanup all of the sockets that we have determined were disconnected or suffered some
764         // other error.
765         plugin.drop_sockets(&mut sockets_to_drop);
766         sockets_to_drop.clear();
767 
768         if redo_poll_ctx_sockets {
769             for socket in plugin.sockets() {
770                 let _ = poll_ctx.delete(socket);
771             }
772         }
773     }
774 
775     // vcpu threads MUST see the kill signaled flag, otherwise they may re-enter the VM.
776     kill_signaled.store(true, Ordering::SeqCst);
777     // Depending on how we ended up here, the plugin process, or a VCPU thread waiting for requests
778     // might be stuck. The `signal_kill` call will unstick all the VCPU threads by closing their
779     // blocked connections.
780     plugin.signal_kill().map_err(Error::PluginKill)?;
781     for handle in vcpu_handles {
782         match handle.kill(SIGRTMIN() + 0) {
783             Ok(_) => {
784                 if let Err(e) = handle.join() {
785                     error!("failed to join vcpu thread: {:?}", e);
786                 }
787             }
788             Err(e) => error!("failed to kill vcpu thread: {}", e),
789         }
790     }
791 
792     match plugin.try_wait() {
793         // The plugin has run out of time by now
794         Ok(ProcessStatus::Running) => Err(Error::PluginTimeout),
795         // Return an error discovered earlier in this function.
796         Ok(ProcessStatus::Success) => res,
797         Ok(ProcessStatus::Fail(code)) => Err(Error::PluginFailed(code)),
798         Ok(ProcessStatus::Signal(code)) => Err(Error::PluginKilled(code)),
799         Err(e) => Err(Error::PluginWait(e)),
800     }
801 }
802