• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::cmp::{max, Reverse};
6 use std::collections::{BTreeMap, BTreeSet};
7 use std::convert::TryInto;
8 use std::fs::{File, OpenOptions};
9 use std::io::prelude::*;
10 use std::io::stdin;
11 use std::iter;
12 use std::mem;
13 use std::ops::RangeInclusive;
14 #[cfg(feature = "gpu")]
15 use std::os::unix::net::UnixStream;
16 use std::os::unix::prelude::OpenOptionsExt;
17 use std::path::Path;
18 use std::str::FromStr;
19 use std::sync::{mpsc, Arc, Barrier};
20 use std::time::Duration;
21 
22 use std::process;
23 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
24 use std::thread;
25 
26 use devices::virtio::vhost::vsock::{VhostVsockConfig, VhostVsockDeviceParameter};
27 use libc;
28 
29 use acpi_tables::sdt::SDT;
30 
31 use anyhow::{anyhow, bail, Context, Result};
32 use base::*;
33 use base::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
34 use devices::serial_device::SerialHardware;
35 use devices::vfio::{VfioCommonSetup, VfioCommonTrait};
36 use devices::virtio::memory_mapper::MemoryMapperTrait;
37 #[cfg(feature = "gpu")]
38 use devices::virtio::{self, EventDevice};
39 #[cfg(feature = "audio")]
40 use devices::Ac97Dev;
41 use devices::{
42     self, BusDeviceObj, HostHotPlugKey, HotPlugBus, IrqEventIndex, KvmKernelIrqChip, PciAddress,
43     PciDevice, PvPanicCode, PvPanicPciDevice, StubPciDevice, VirtioPciDevice,
44 };
45 use devices::{CoIommuDev, IommuDevType};
46 #[cfg(feature = "usb")]
47 use devices::{HostBackendDeviceProvider, XhciController};
48 use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
49 use hypervisor::{HypervisorCap, ProtectionType, Vm, VmCap};
50 use minijail::{self, Minijail};
51 use resources::{Alloc, SystemAllocator};
52 use rutabaga_gfx::RutabagaGralloc;
53 use sync::Mutex;
54 use vm_control::*;
55 use vm_memory::{GuestAddress, GuestMemory, MemoryPolicy};
56 
57 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
58 use crate::gdb::{gdb_thread, GdbStub};
59 use crate::{Config, Executable, FileBackedMappingParameters, SharedDir, SharedDirKind, VfioType};
60 use arch::{
61     self, LinuxArch, RunnableLinuxVm, VcpuAffinity, VirtioDeviceStub, VmComponents, VmImage,
62 };
63 
64 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
65 use {
66     crate::HostPcieRootPortParameters,
67     devices::{
68         IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip, PciBridge, PcieHostRootPort, PcieRootPort,
69     },
70     hypervisor::{VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
71     x86_64::X8664arch as Arch,
72 };
73 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
74 use {
75     aarch64::AArch64 as Arch,
76     devices::IrqChipAArch64 as IrqChipArch,
77     hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
78 };
79 
80 mod device_helpers;
81 use device_helpers::*;
82 pub(crate) mod jail_helpers;
83 use jail_helpers::*;
84 mod vcpu;
85 
86 #[cfg(feature = "gpu")]
87 pub(crate) mod gpu;
88 #[cfg(feature = "gpu")]
89 pub use gpu::GpuRenderServerParameters;
90 #[cfg(feature = "gpu")]
91 use gpu::*;
92 
93 #[cfg(target_os = "android")]
94 mod android;
95 
96 // gpu_device_tube is not used when GPU support is disabled.
97 #[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
create_virtio_devices( cfg: &Config, vm: &mut impl Vm, resources: &mut SystemAllocator, _exit_evt: &Event, wayland_device_tube: Tube, gpu_device_tube: Tube, vhost_user_gpu_tubes: Vec<(Tube, Tube, Tube)>, balloon_device_tube: Option<Tube>, balloon_inflate_tube: Option<Tube>, init_balloon_size: u64, disk_device_tubes: &mut Vec<Tube>, pmem_device_tubes: &mut Vec<Tube>, map_request: Arc<Mutex<Option<ExternalMapping>>>, fs_device_tubes: &mut Vec<Tube>, #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>, vvu_proxy_device_tubes: &mut Vec<Tube>, vvu_proxy_max_sibling_mem_size: u64, ) -> DeviceResult<Vec<VirtioDeviceStub>>98 fn create_virtio_devices(
99     cfg: &Config,
100     vm: &mut impl Vm,
101     resources: &mut SystemAllocator,
102     _exit_evt: &Event,
103     wayland_device_tube: Tube,
104     gpu_device_tube: Tube,
105     vhost_user_gpu_tubes: Vec<(Tube, Tube, Tube)>,
106     balloon_device_tube: Option<Tube>,
107     balloon_inflate_tube: Option<Tube>,
108     init_balloon_size: u64,
109     disk_device_tubes: &mut Vec<Tube>,
110     pmem_device_tubes: &mut Vec<Tube>,
111     map_request: Arc<Mutex<Option<ExternalMapping>>>,
112     fs_device_tubes: &mut Vec<Tube>,
113     #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>,
114     vvu_proxy_device_tubes: &mut Vec<Tube>,
115     vvu_proxy_max_sibling_mem_size: u64,
116 ) -> DeviceResult<Vec<VirtioDeviceStub>> {
117     let mut devs = Vec::new();
118 
119     #[cfg(feature = "gpu")]
120     for (opt, (host_gpu_tube, device_gpu_tube, device_control_tube)) in
121         cfg.vhost_user_gpu.iter().zip(vhost_user_gpu_tubes)
122     {
123         devs.push(create_vhost_user_gpu_device(
124             cfg,
125             opt,
126             (host_gpu_tube, device_gpu_tube),
127             device_control_tube,
128         )?);
129     }
130 
131     for opt in &cfg.vvu_proxy {
132         devs.push(create_vvu_proxy_device(
133             cfg,
134             opt,
135             vvu_proxy_device_tubes.remove(0),
136             vvu_proxy_max_sibling_mem_size,
137         )?);
138     }
139 
140     #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
141     let mut resource_bridges = Vec::<Tube>::new();
142 
143     if !cfg.wayland_socket_paths.is_empty() {
144         #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
145         let mut wl_resource_bridge = None::<Tube>;
146 
147         #[cfg(feature = "gpu")]
148         {
149             if cfg.gpu_parameters.is_some() {
150                 let (wl_socket, gpu_socket) = Tube::pair().context("failed to create tube")?;
151                 resource_bridges.push(gpu_socket);
152                 wl_resource_bridge = Some(wl_socket);
153             }
154         }
155 
156         devs.push(create_wayland_device(
157             cfg,
158             wayland_device_tube,
159             wl_resource_bridge,
160         )?);
161     }
162 
163     #[cfg(feature = "video-decoder")]
164     let video_dec_cfg = if let Some(backend) = cfg.video_dec {
165         let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
166         resource_bridges.push(gpu_tube);
167         Some((video_tube, backend))
168     } else {
169         None
170     };
171 
172     #[cfg(feature = "video-encoder")]
173     let video_enc_cfg = if let Some(backend) = cfg.video_enc {
174         let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
175         resource_bridges.push(gpu_tube);
176         Some((video_tube, backend))
177     } else {
178         None
179     };
180 
181     #[cfg(feature = "gpu")]
182     {
183         if let Some(gpu_parameters) = &cfg.gpu_parameters {
184             let mut gpu_display_w = virtio::DEFAULT_DISPLAY_WIDTH;
185             let mut gpu_display_h = virtio::DEFAULT_DISPLAY_HEIGHT;
186             if !gpu_parameters.displays.is_empty() {
187                 gpu_display_w = gpu_parameters.displays[0].width;
188                 gpu_display_h = gpu_parameters.displays[0].height;
189             }
190 
191             let mut event_devices = Vec::new();
192             if cfg.display_window_mouse {
193                 let (event_device_socket, virtio_dev_socket) =
194                     UnixStream::pair().context("failed to create socket")?;
195                 let (multi_touch_width, multi_touch_height) = cfg
196                     .virtio_multi_touch
197                     .first()
198                     .as_ref()
199                     .map(|multi_touch_spec| multi_touch_spec.get_size())
200                     .unwrap_or((gpu_display_w, gpu_display_h));
201                 let dev = virtio::new_multi_touch(
202                     // u32::MAX is the least likely to collide with the indices generated above for
203                     // the multi_touch options, which begin at 0.
204                     u32::MAX,
205                     virtio_dev_socket,
206                     multi_touch_width,
207                     multi_touch_height,
208                     virtio::base_features(cfg.protected_vm),
209                 )
210                 .context("failed to set up mouse device")?;
211                 devs.push(VirtioDeviceStub {
212                     dev: Box::new(dev),
213                     jail: simple_jail(&cfg.jail_config, "input_device")?,
214                 });
215                 event_devices.push(EventDevice::touchscreen(event_device_socket));
216             }
217             if cfg.display_window_keyboard {
218                 let (event_device_socket, virtio_dev_socket) =
219                     UnixStream::pair().context("failed to create socket")?;
220                 let dev = virtio::new_keyboard(
221                     // u32::MAX is the least likely to collide with the indices generated above for
222                     // the multi_touch options, which begin at 0.
223                     u32::MAX,
224                     virtio_dev_socket,
225                     virtio::base_features(cfg.protected_vm),
226                 )
227                 .context("failed to set up keyboard device")?;
228                 devs.push(VirtioDeviceStub {
229                     dev: Box::new(dev),
230                     jail: simple_jail(&cfg.jail_config, "input_device")?,
231                 });
232                 event_devices.push(EventDevice::keyboard(event_device_socket));
233             }
234 
235             devs.push(create_gpu_device(
236                 cfg,
237                 _exit_evt,
238                 gpu_device_tube,
239                 resource_bridges,
240                 // Use the unnamed socket for GPU display screens.
241                 cfg.wayland_socket_paths.get(""),
242                 cfg.x_display.clone(),
243                 render_server_fd,
244                 event_devices,
245                 map_request,
246             )?);
247         }
248     }
249 
250     for (_, param) in cfg
251         .serial_parameters
252         .iter()
253         .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
254     {
255         let dev = create_console_device(cfg, param)?;
256         devs.push(dev);
257     }
258 
259     for disk in &cfg.disks {
260         let disk_device_tube = disk_device_tubes.remove(0);
261         devs.push(create_block_device(cfg, disk, disk_device_tube)?);
262     }
263 
264     for blk in &cfg.vhost_user_blk {
265         devs.push(create_vhost_user_block_device(cfg, blk)?);
266     }
267 
268     for console in &cfg.vhost_user_console {
269         devs.push(create_vhost_user_console_device(cfg, console)?);
270     }
271 
272     for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
273         let pmem_device_tube = pmem_device_tubes.remove(0);
274         devs.push(create_pmem_device(
275             cfg,
276             vm,
277             resources,
278             pmem_disk,
279             index,
280             pmem_device_tube,
281         )?);
282     }
283 
284     if cfg.rng {
285         devs.push(create_rng_device(cfg)?);
286     }
287 
288     #[cfg(feature = "tpm")]
289     {
290         if cfg.software_tpm {
291             devs.push(create_software_tpm_device(cfg)?);
292         }
293     }
294 
295     for (idx, single_touch_spec) in cfg.virtio_single_touch.iter().enumerate() {
296         devs.push(create_single_touch_device(
297             cfg,
298             single_touch_spec,
299             idx as u32,
300         )?);
301     }
302 
303     for (idx, multi_touch_spec) in cfg.virtio_multi_touch.iter().enumerate() {
304         devs.push(create_multi_touch_device(
305             cfg,
306             multi_touch_spec,
307             idx as u32,
308         )?);
309     }
310 
311     for (idx, trackpad_spec) in cfg.virtio_trackpad.iter().enumerate() {
312         devs.push(create_trackpad_device(cfg, trackpad_spec, idx as u32)?);
313     }
314 
315     for (idx, mouse_socket) in cfg.virtio_mice.iter().enumerate() {
316         devs.push(create_mouse_device(cfg, mouse_socket, idx as u32)?);
317     }
318 
319     for (idx, keyboard_socket) in cfg.virtio_keyboard.iter().enumerate() {
320         devs.push(create_keyboard_device(cfg, keyboard_socket, idx as u32)?);
321     }
322 
323     for (idx, switches_socket) in cfg.virtio_switches.iter().enumerate() {
324         devs.push(create_switches_device(cfg, switches_socket, idx as u32)?);
325     }
326 
327     for dev_path in &cfg.virtio_input_evdevs {
328         devs.push(create_vinput_device(cfg, dev_path)?);
329     }
330 
331     if let Some(balloon_device_tube) = balloon_device_tube {
332         devs.push(create_balloon_device(
333             cfg,
334             balloon_device_tube,
335             balloon_inflate_tube,
336             init_balloon_size,
337         )?);
338     }
339 
340     // We checked above that if the IP is defined, then the netmask is, too.
341     for tap_fd in &cfg.tap_fd {
342         devs.push(create_tap_net_device_from_fd(cfg, *tap_fd)?);
343     }
344 
345     if let (Some(host_ip), Some(netmask), Some(mac_address)) =
346         (cfg.host_ip, cfg.netmask, cfg.mac_address)
347     {
348         if !cfg.vhost_user_net.is_empty() {
349             bail!("vhost-user-net cannot be used with any of --host_ip, --netmask or --mac");
350         }
351         devs.push(create_net_device_from_config(
352             cfg,
353             host_ip,
354             netmask,
355             mac_address,
356         )?);
357     }
358 
359     for tap_name in &cfg.tap_name {
360         devs.push(create_tap_net_device_from_name(cfg, tap_name.as_bytes())?);
361     }
362 
363     for net in &cfg.vhost_user_net {
364         devs.push(create_vhost_user_net_device(cfg, net)?);
365     }
366 
367     for vsock in &cfg.vhost_user_vsock {
368         devs.push(create_vhost_user_vsock_device(cfg, vsock)?);
369     }
370 
371     for opt in &cfg.vhost_user_wl {
372         devs.push(create_vhost_user_wl_device(cfg, opt)?);
373     }
374 
375     #[cfg(feature = "audio_cras")]
376     {
377         for cras_snd in &cfg.cras_snds {
378             devs.push(create_cras_snd_device(cfg, cras_snd.clone())?);
379         }
380     }
381 
382     #[cfg(feature = "video-decoder")]
383     {
384         if let Some((video_dec_tube, video_dec_backend)) = video_dec_cfg {
385             register_video_device(
386                 video_dec_backend,
387                 &mut devs,
388                 video_dec_tube,
389                 cfg,
390                 devices::virtio::VideoDeviceType::Decoder,
391             )?;
392         }
393     }
394 
395     #[cfg(feature = "video-encoder")]
396     {
397         if let Some((video_enc_tube, video_enc_backend)) = video_enc_cfg {
398             register_video_device(
399                 video_enc_backend,
400                 &mut devs,
401                 video_enc_tube,
402                 cfg,
403                 devices::virtio::VideoDeviceType::Encoder,
404             )?;
405         }
406     }
407 
408     if let Some(cid) = cfg.cid {
409         let vhost_config = VhostVsockConfig {
410             device: cfg
411                 .vhost_vsock_device
412                 .clone()
413                 .unwrap_or(VhostVsockDeviceParameter::default()),
414             cid,
415         };
416         devs.push(create_vhost_vsock_device(cfg, &vhost_config)?);
417     }
418 
419     for vhost_user_fs in &cfg.vhost_user_fs {
420         devs.push(create_vhost_user_fs_device(cfg, vhost_user_fs)?);
421     }
422 
423     #[cfg(feature = "audio")]
424     for vhost_user_snd in &cfg.vhost_user_snd {
425         devs.push(create_vhost_user_snd_device(cfg, vhost_user_snd)?);
426     }
427 
428     for shared_dir in &cfg.shared_dirs {
429         let SharedDir {
430             src,
431             tag,
432             kind,
433             uid_map,
434             gid_map,
435             fs_cfg,
436             p9_cfg,
437         } = shared_dir;
438 
439         let dev = match kind {
440             SharedDirKind::FS => {
441                 let device_tube = fs_device_tubes.remove(0);
442                 create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone(), device_tube)?
443             }
444             SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag, p9_cfg.clone())?,
445         };
446         devs.push(dev);
447     }
448 
449     if let Some(vhost_user_mac80211_hwsim) = &cfg.vhost_user_mac80211_hwsim {
450         devs.push(create_vhost_user_mac80211_hwsim_device(
451             cfg,
452             vhost_user_mac80211_hwsim,
453         )?);
454     }
455 
456     #[cfg(feature = "audio")]
457     if let Some(path) = &cfg.sound {
458         devs.push(create_sound_device(path, cfg)?);
459     }
460 
461     Ok(devs)
462 }
463 
create_devices( cfg: &Config, vm: &mut impl Vm, resources: &mut SystemAllocator, exit_evt: &Event, panic_wrtube: Tube, iommu_attached_endpoints: &mut BTreeMap<u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>>, control_tubes: &mut Vec<TaggedControlTube>, wayland_device_tube: Tube, gpu_device_tube: Tube, vhost_user_gpu_tubes: Vec<(Tube, Tube, Tube)>, balloon_device_tube: Option<Tube>, init_balloon_size: u64, disk_device_tubes: &mut Vec<Tube>, pmem_device_tubes: &mut Vec<Tube>, fs_device_tubes: &mut Vec<Tube>, #[cfg(feature = "usb")] usb_provider: HostBackendDeviceProvider, map_request: Arc<Mutex<Option<ExternalMapping>>>, #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>, vvu_proxy_device_tubes: &mut Vec<Tube>, vvu_proxy_max_sibling_mem_size: u64, ) -> DeviceResult<Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>>464 fn create_devices(
465     cfg: &Config,
466     vm: &mut impl Vm,
467     resources: &mut SystemAllocator,
468     exit_evt: &Event,
469     panic_wrtube: Tube,
470     iommu_attached_endpoints: &mut BTreeMap<u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>>,
471     control_tubes: &mut Vec<TaggedControlTube>,
472     wayland_device_tube: Tube,
473     gpu_device_tube: Tube,
474     // Tuple content: (host-side GPU tube, device-side GPU tube, device-side control tube).
475     vhost_user_gpu_tubes: Vec<(Tube, Tube, Tube)>,
476     balloon_device_tube: Option<Tube>,
477     init_balloon_size: u64,
478     disk_device_tubes: &mut Vec<Tube>,
479     pmem_device_tubes: &mut Vec<Tube>,
480     fs_device_tubes: &mut Vec<Tube>,
481     #[cfg(feature = "usb")] usb_provider: HostBackendDeviceProvider,
482     map_request: Arc<Mutex<Option<ExternalMapping>>>,
483     #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>,
484     vvu_proxy_device_tubes: &mut Vec<Tube>,
485     vvu_proxy_max_sibling_mem_size: u64,
486 ) -> DeviceResult<Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>> {
487     let mut devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)> = Vec::new();
488     let mut balloon_inflate_tube: Option<Tube> = None;
489     if !cfg.vfio.is_empty() {
490         let mut coiommu_attached_endpoints = Vec::new();
491 
492         for vfio_dev in cfg
493             .vfio
494             .iter()
495             .filter(|dev| dev.get_type() == VfioType::Pci)
496         {
497             let vfio_path = &vfio_dev.vfio_path;
498             let (vfio_pci_device, jail) = create_vfio_device(
499                 cfg,
500                 vm,
501                 resources,
502                 control_tubes,
503                 vfio_path.as_path(),
504                 None,
505                 vfio_dev.guest_address(),
506                 iommu_attached_endpoints,
507                 Some(&mut coiommu_attached_endpoints),
508                 vfio_dev.iommu_dev_type(),
509             )?;
510 
511             devices.push((vfio_pci_device, jail));
512         }
513 
514         for vfio_dev in cfg
515             .vfio
516             .iter()
517             .filter(|dev| dev.get_type() == VfioType::Platform)
518         {
519             let vfio_path = &vfio_dev.vfio_path;
520             let (vfio_plat_dev, jail) = create_vfio_platform_device(
521                 cfg,
522                 vm,
523                 resources,
524                 control_tubes,
525                 vfio_path.as_path(),
526                 iommu_attached_endpoints,
527                 IommuDevType::NoIommu, // Virtio IOMMU is not supported yet
528             )?;
529 
530             devices.push((Box::new(vfio_plat_dev), jail));
531         }
532 
533         if !coiommu_attached_endpoints.is_empty() || !iommu_attached_endpoints.is_empty() {
534             let mut buf = mem::MaybeUninit::<libc::rlimit>::zeroed();
535             let res = unsafe { libc::getrlimit(libc::RLIMIT_MEMLOCK, buf.as_mut_ptr()) };
536             if res == 0 {
537                 let limit = unsafe { buf.assume_init() };
538                 let rlim_new = limit
539                     .rlim_cur
540                     .saturating_add(vm.get_memory().memory_size() as libc::rlim_t);
541                 let rlim_max = max(limit.rlim_max, rlim_new);
542                 if limit.rlim_cur < rlim_new {
543                     let limit_arg = libc::rlimit {
544                         rlim_cur: rlim_new as libc::rlim_t,
545                         rlim_max: rlim_max as libc::rlim_t,
546                     };
547                     let res = unsafe { libc::setrlimit(libc::RLIMIT_MEMLOCK, &limit_arg) };
548                     if res != 0 {
549                         bail!("Set rlimit failed");
550                     }
551                 }
552             } else {
553                 bail!("Get rlimit failed");
554             }
555         }
556 
557         if !coiommu_attached_endpoints.is_empty() {
558             let vfio_container =
559                 VfioCommonSetup::vfio_get_container(IommuDevType::CoIommu, None as Option<&Path>)
560                     .context("failed to get vfio container")?;
561             let (coiommu_host_tube, coiommu_device_tube) =
562                 Tube::pair().context("failed to create coiommu tube")?;
563             control_tubes.push(TaggedControlTube::VmMemory(coiommu_host_tube));
564             let vcpu_count = cfg.vcpu_count.unwrap_or(1) as u64;
565             let (coiommu_tube, balloon_tube) =
566                 Tube::pair().context("failed to create coiommu tube")?;
567             balloon_inflate_tube = Some(balloon_tube);
568             let dev = CoIommuDev::new(
569                 vm.get_memory().clone(),
570                 vfio_container,
571                 coiommu_device_tube,
572                 coiommu_tube,
573                 coiommu_attached_endpoints,
574                 vcpu_count,
575                 cfg.coiommu_param.unwrap_or_default(),
576             )
577             .context("failed to create coiommu device")?;
578 
579             devices.push((Box::new(dev), simple_jail(&cfg.jail_config, "coiommu")?));
580         }
581     }
582 
583     let stubs = create_virtio_devices(
584         cfg,
585         vm,
586         resources,
587         exit_evt,
588         wayland_device_tube,
589         gpu_device_tube,
590         vhost_user_gpu_tubes,
591         balloon_device_tube,
592         balloon_inflate_tube,
593         init_balloon_size,
594         disk_device_tubes,
595         pmem_device_tubes,
596         map_request,
597         fs_device_tubes,
598         #[cfg(feature = "gpu")]
599         render_server_fd,
600         vvu_proxy_device_tubes,
601         vvu_proxy_max_sibling_mem_size,
602     )?;
603 
604     for stub in stubs {
605         let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
606         control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
607         let dev = VirtioPciDevice::new(vm.get_memory().clone(), stub.dev, msi_device_tube)
608             .context("failed to create virtio pci dev")?;
609         let dev = Box::new(dev) as Box<dyn BusDeviceObj>;
610         devices.push((dev, stub.jail));
611     }
612 
613     #[cfg(feature = "audio")]
614     for ac97_param in &cfg.ac97_parameters {
615         let dev = Ac97Dev::try_new(vm.get_memory().clone(), ac97_param.clone())
616             .context("failed to create ac97 device")?;
617         let jail = simple_jail(&cfg.jail_config, dev.minijail_policy())?;
618         devices.push((Box::new(dev), jail));
619     }
620 
621     #[cfg(feature = "usb")]
622     if cfg.usb {
623         // Create xhci controller.
624         let usb_controller = Box::new(XhciController::new(vm.get_memory().clone(), usb_provider));
625         devices.push((usb_controller, simple_jail(&cfg.jail_config, "xhci")?));
626     }
627 
628     for params in &cfg.stub_pci_devices {
629         // Stub devices don't need jailing since they don't do anything.
630         devices.push((Box::new(StubPciDevice::new(params)), None));
631     }
632 
633     devices.push((Box::new(PvPanicPciDevice::new(panic_wrtube)), None));
634     Ok(devices)
635 }
636 
create_file_backed_mappings( cfg: &Config, vm: &mut impl Vm, resources: &mut SystemAllocator, ) -> Result<()>637 fn create_file_backed_mappings(
638     cfg: &Config,
639     vm: &mut impl Vm,
640     resources: &mut SystemAllocator,
641 ) -> Result<()> {
642     for mapping in &cfg.file_backed_mappings {
643         let file = OpenOptions::new()
644             .read(true)
645             .write(mapping.writable)
646             .custom_flags(if mapping.sync { libc::O_SYNC } else { 0 })
647             .open(&mapping.path)
648             .context("failed to open file for file-backed mapping")?;
649         let prot = if mapping.writable {
650             Protection::read_write()
651         } else {
652             Protection::read()
653         };
654         let size = mapping
655             .size
656             .try_into()
657             .context("Invalid size for file-backed mapping")?;
658         let memory_mapping = MemoryMappingBuilder::new(size)
659             .from_file(&file)
660             .offset(mapping.offset)
661             .protection(prot)
662             .build()
663             .context("failed to map backing file for file-backed mapping")?;
664 
665         match resources.mmio_allocator_any().allocate_at(
666             mapping.address,
667             mapping.size,
668             Alloc::FileBacked(mapping.address),
669             "file-backed mapping".to_owned(),
670         ) {
671             // OutOfSpace just means that this mapping is not in the MMIO regions at all, so don't
672             // consider it an error.
673             // TODO(b/222769529): Reserve this region in a global memory address space allocator once
674             // we have that so nothing else can accidentally overlap with it.
675             Ok(()) | Err(resources::Error::OutOfSpace) => {}
676             e => e.context("failed to allocate guest address for file-backed mapping")?,
677         }
678 
679         vm.add_memory_region(
680             GuestAddress(mapping.address),
681             Box::new(memory_mapping),
682             !mapping.writable,
683             /* log_dirty_pages = */ false,
684         )
685         .context("failed to configure file-backed mapping")?;
686     }
687 
688     Ok(())
689 }
690 
691 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
create_pcie_root_port( host_pcie_rp: Vec<HostPcieRootPortParameters>, sys_allocator: &mut SystemAllocator, control_tubes: &mut Vec<TaggedControlTube>, devices: &mut Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>, hp_vec: &mut Vec<Arc<Mutex<dyn HotPlugBus>>>, hp_endpoints_ranges: &mut Vec<RangeInclusive<u32>>, #[allow(clippy::ptr_arg)] gpe_notify_devs: &mut Vec<(u32, Arc<Mutex<dyn GpeNotify>>)>, ) -> Result<()>692 fn create_pcie_root_port(
693     host_pcie_rp: Vec<HostPcieRootPortParameters>,
694     sys_allocator: &mut SystemAllocator,
695     control_tubes: &mut Vec<TaggedControlTube>,
696     devices: &mut Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
697     hp_vec: &mut Vec<Arc<Mutex<dyn HotPlugBus>>>,
698     hp_endpoints_ranges: &mut Vec<RangeInclusive<u32>>,
699     // TODO(b/228627457): clippy is incorrectly warning about this Vec, which needs to be a Vec so
700     // we can push into it
701     #[allow(clippy::ptr_arg)] gpe_notify_devs: &mut Vec<(u32, Arc<Mutex<dyn GpeNotify>>)>,
702 ) -> Result<()> {
703     if host_pcie_rp.is_empty() {
704         // user doesn't specify host pcie root port which link to this virtual pcie rp,
705         // find the empty bus and create a total virtual pcie rp
706         let mut hp_sec_bus = 0u8;
707         // Create Pcie Root Port for non-root buses, each non-root bus device will be
708         // connected behind a virtual pcie root port.
709         for i in 1..255 {
710             if sys_allocator.pci_bus_empty(i) {
711                 if hp_sec_bus == 0 {
712                     hp_sec_bus = i;
713                 }
714                 continue;
715             }
716             let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new(i, false)));
717             let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
718             control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
719             let pci_bridge = Box::new(PciBridge::new(pcie_root_port.clone(), msi_device_tube));
720             // no ipc is used if the root port disables hotplug
721             devices.push((pci_bridge, None));
722         }
723 
724         // Create Pcie Root Port for hot-plug
725         if hp_sec_bus == 0 {
726             return Err(anyhow!("no more addresses are available"));
727         }
728         let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new(hp_sec_bus, true)));
729         let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
730         control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
731         let pci_bridge = Box::new(PciBridge::new(pcie_root_port.clone(), msi_device_tube));
732 
733         hp_endpoints_ranges.push(RangeInclusive::new(
734             PciAddress {
735                 bus: pci_bridge.get_secondary_num(),
736                 dev: 0,
737                 func: 0,
738             }
739             .to_u32(),
740             PciAddress {
741                 bus: pci_bridge.get_subordinate_num(),
742                 dev: 32,
743                 func: 8,
744             }
745             .to_u32(),
746         ));
747 
748         devices.push((pci_bridge, None));
749         hp_vec.push(pcie_root_port as Arc<Mutex<dyn HotPlugBus>>);
750     } else {
751         // user specify host pcie root port which link to this virtual pcie rp,
752         // reserve the host pci BDF and create a virtual pcie RP with some attrs same as host
753         for host_pcie in host_pcie_rp.iter() {
754             let (vm_host_tube, vm_device_tube) = Tube::pair().context("failed to create tube")?;
755             let pcie_host = PcieHostRootPort::new(host_pcie.host_path.as_path(), vm_device_tube)?;
756             let bus_range = pcie_host.get_bus_range();
757             let mut slot_implemented = true;
758             for i in bus_range.secondary..=bus_range.subordinate {
759                 // if this bus is occupied by one vfio-pci device, this vfio-pci device is
760                 // connected to a pci bridge on host statically, then it should be connected
761                 // to a virtual pci bridge in guest statically, this bridge won't have
762                 // hotplug capability and won't use slot.
763                 if !sys_allocator.pci_bus_empty(i) {
764                     slot_implemented = false;
765                     break;
766                 }
767             }
768 
769             let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new_from_host(
770                 pcie_host,
771                 slot_implemented,
772             )?));
773             control_tubes.push(TaggedControlTube::Vm(vm_host_tube));
774 
775             let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
776             control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
777             let mut pci_bridge = Box::new(PciBridge::new(pcie_root_port.clone(), msi_device_tube));
778             // early reservation for host pcie root port devices.
779             let rootport_addr = pci_bridge.allocate_address(sys_allocator);
780             if rootport_addr.is_err() {
781                 warn!(
782                     "address reservation failed for hot pcie root port {}",
783                     pci_bridge.debug_label()
784                 );
785             }
786 
787             // Only append the sub pci range of a hot-pluggable root port to virtio-iommu
788             if slot_implemented {
789                 hp_endpoints_ranges.push(RangeInclusive::new(
790                     PciAddress {
791                         bus: pci_bridge.get_secondary_num(),
792                         dev: 0,
793                         func: 0,
794                     }
795                     .to_u32(),
796                     PciAddress {
797                         bus: pci_bridge.get_subordinate_num(),
798                         dev: 32,
799                         func: 8,
800                     }
801                     .to_u32(),
802                 ));
803             }
804 
805             devices.push((pci_bridge, None));
806             if slot_implemented {
807                 if let Some(gpe) = host_pcie.hp_gpe {
808                     gpe_notify_devs
809                         .push((gpe, pcie_root_port.clone() as Arc<Mutex<dyn GpeNotify>>));
810                 }
811                 hp_vec.push(pcie_root_port as Arc<Mutex<dyn HotPlugBus>>);
812             }
813         }
814     }
815 
816     Ok(())
817 }
818 
setup_vm_components(cfg: &Config) -> Result<VmComponents>819 fn setup_vm_components(cfg: &Config) -> Result<VmComponents> {
820     let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
821         Some(
822             open_file(initrd_path, OpenOptions::new().read(true))
823                 .with_context(|| format!("failed to open initrd {}", initrd_path.display()))?,
824         )
825     } else {
826         None
827     };
828 
829     let vm_image = match cfg.executable_path {
830         Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
831             open_file(kernel_path, OpenOptions::new().read(true)).with_context(|| {
832                 format!("failed to open kernel image {}", kernel_path.display())
833             })?,
834         ),
835         Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
836             open_file(bios_path, OpenOptions::new().read(true))
837                 .with_context(|| format!("failed to open bios {}", bios_path.display()))?,
838         ),
839         _ => panic!("Did not receive a bios or kernel, should be impossible."),
840     };
841 
842     let swiotlb = if let Some(size) = cfg.swiotlb {
843         Some(
844             size.checked_mul(1024 * 1024)
845                 .ok_or_else(|| anyhow!("requested swiotlb size too large"))?,
846         )
847     } else {
848         match cfg.protected_vm {
849             ProtectionType::Protected | ProtectionType::ProtectedWithoutFirmware => {
850                 Some(64 * 1024 * 1024)
851             }
852             ProtectionType::Unprotected => None,
853         }
854     };
855 
856     Ok(VmComponents {
857         memory_size: cfg
858             .memory
859             .unwrap_or(256)
860             .checked_mul(1024 * 1024)
861             .ok_or_else(|| anyhow!("requested memory size too large"))?,
862         swiotlb,
863         vcpu_count: cfg.vcpu_count.unwrap_or(1),
864         vcpu_affinity: cfg.vcpu_affinity.clone(),
865         cpu_clusters: cfg.cpu_clusters.clone(),
866         cpu_capacity: cfg.cpu_capacity.clone(),
867         #[cfg(feature = "direct")]
868         direct_gpe: cfg.direct_gpe.clone(),
869         no_smt: cfg.no_smt,
870         hugepages: cfg.hugepages,
871         vm_image,
872         android_fstab: cfg
873             .android_fstab
874             .as_ref()
875             .map(|x| {
876                 File::open(x)
877                     .with_context(|| format!("failed to open android fstab file {}", x.display()))
878             })
879             .map_or(Ok(None), |v| v.map(Some))?,
880         pstore: cfg.pstore.clone(),
881         initrd_image,
882         extra_kernel_params: cfg.params.clone(),
883         acpi_sdts: cfg
884             .acpi_tables
885             .iter()
886             .map(|path| {
887                 SDT::from_file(path)
888                     .with_context(|| format!("failed to open ACPI file {}", path.display()))
889             })
890             .collect::<Result<Vec<SDT>>>()?,
891         rt_cpus: cfg.rt_cpus.clone(),
892         delay_rt: cfg.delay_rt,
893         protected_vm: cfg.protected_vm,
894         #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
895         gdb: None,
896         dmi_path: cfg.dmi_path.clone(),
897         no_legacy: cfg.no_legacy,
898         host_cpu_topology: cfg.host_cpu_topology,
899         #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
900         force_s2idle: cfg.force_s2idle,
901     })
902 }
903 
904 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
905 pub enum ExitState {
906     Reset,
907     Stop,
908     Crash,
909     GuestPanic,
910 }
911 
912 // Remove ranges in `guest_mem_layout` that overlap with ranges in `file_backed_mappings`.
913 // Returns the updated guest memory layout.
punch_holes_in_guest_mem_layout_for_mappings( guest_mem_layout: Vec<(GuestAddress, u64)>, file_backed_mappings: &[FileBackedMappingParameters], ) -> Vec<(GuestAddress, u64)>914 fn punch_holes_in_guest_mem_layout_for_mappings(
915     guest_mem_layout: Vec<(GuestAddress, u64)>,
916     file_backed_mappings: &[FileBackedMappingParameters],
917 ) -> Vec<(GuestAddress, u64)> {
918     // Create a set containing (start, end) pairs with exclusive end (end = start + size; the byte
919     // at end is not included in the range).
920     let mut layout_set = BTreeSet::new();
921     for (addr, size) in &guest_mem_layout {
922         layout_set.insert((addr.offset(), addr.offset() + size));
923     }
924 
925     for mapping in file_backed_mappings {
926         let mapping_start = mapping.address;
927         let mapping_end = mapping_start + mapping.size;
928 
929         // Repeatedly split overlapping guest memory regions until no overlaps remain.
930         while let Some((range_start, range_end)) = layout_set
931             .iter()
932             .find(|&&(range_start, range_end)| {
933                 mapping_start < range_end && mapping_end > range_start
934             })
935             .cloned()
936         {
937             layout_set.remove(&(range_start, range_end));
938 
939             if range_start < mapping_start {
940                 layout_set.insert((range_start, mapping_start));
941             }
942             if range_end > mapping_end {
943                 layout_set.insert((mapping_end, range_end));
944             }
945         }
946     }
947 
948     // Build the final guest memory layout from the modified layout_set.
949     layout_set
950         .iter()
951         .map(|(start, end)| (GuestAddress(*start), end - start))
952         .collect()
953 }
954 
run_config(cfg: Config) -> Result<ExitState>955 pub fn run_config(cfg: Config) -> Result<ExitState> {
956     let components = setup_vm_components(&cfg)?;
957 
958     let guest_mem_layout =
959         Arch::guest_memory_layout(&components).context("failed to create guest memory layout")?;
960 
961     let guest_mem_layout =
962         punch_holes_in_guest_mem_layout_for_mappings(guest_mem_layout, &cfg.file_backed_mappings);
963 
964     let guest_mem = GuestMemory::new(&guest_mem_layout).context("failed to create guest memory")?;
965     let mut mem_policy = MemoryPolicy::empty();
966     if components.hugepages {
967         mem_policy |= MemoryPolicy::USE_HUGEPAGES;
968     }
969     guest_mem.set_memory_policy(mem_policy);
970     let kvm = Kvm::new_with_path(&cfg.kvm_device_path).context("failed to create kvm")?;
971     let vm = KvmVm::new(&kvm, guest_mem, components.protected_vm).context("failed to create vm")?;
972 
973     if !cfg.userspace_msr.is_empty() {
974         vm.enable_userspace_msr()
975             .context("failed to enable userspace MSR handling, do you have kernel 5.10 or later")?;
976     }
977 
978     // Check that the VM was actually created in protected mode as expected.
979     if cfg.protected_vm != ProtectionType::Unprotected && !vm.check_capability(VmCap::Protected) {
980         bail!("Failed to create protected VM");
981     }
982     let vm_clone = vm.try_clone().context("failed to clone vm")?;
983 
984     enum KvmIrqChip {
985         #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
986         Split(KvmSplitIrqChip),
987         Kernel(KvmKernelIrqChip),
988     }
989 
990     impl KvmIrqChip {
991         fn as_mut(&mut self) -> &mut dyn IrqChipArch {
992             match self {
993                 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
994                 KvmIrqChip::Split(i) => i,
995                 KvmIrqChip::Kernel(i) => i,
996             }
997         }
998     }
999 
1000     let ioapic_host_tube;
1001     let mut irq_chip = if cfg.split_irqchip {
1002         #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
1003         unimplemented!("KVM split irqchip mode only supported on x86 processors");
1004         #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1005         {
1006             let (host_tube, ioapic_device_tube) = Tube::pair().context("failed to create tube")?;
1007             ioapic_host_tube = Some(host_tube);
1008             KvmIrqChip::Split(
1009                 KvmSplitIrqChip::new(
1010                     vm_clone,
1011                     components.vcpu_count,
1012                     ioapic_device_tube,
1013                     Some(120),
1014                 )
1015                 .context("failed to create IRQ chip")?,
1016             )
1017         }
1018     } else {
1019         ioapic_host_tube = None;
1020         KvmIrqChip::Kernel(
1021             KvmKernelIrqChip::new(vm_clone, components.vcpu_count)
1022                 .context("failed to create IRQ chip")?,
1023         )
1024     };
1025 
1026     run_vm::<KvmVcpu, KvmVm>(cfg, components, vm, irq_chip.as_mut(), ioapic_host_tube)
1027 }
1028 
run_vm<Vcpu, V>( cfg: Config, #[allow(unused_mut)] mut components: VmComponents, mut vm: V, irq_chip: &mut dyn IrqChipArch, ioapic_host_tube: Option<Tube>, ) -> Result<ExitState> where Vcpu: VcpuArch + 'static, V: VmArch + 'static,1029 fn run_vm<Vcpu, V>(
1030     cfg: Config,
1031     #[allow(unused_mut)] mut components: VmComponents,
1032     mut vm: V,
1033     irq_chip: &mut dyn IrqChipArch,
1034     ioapic_host_tube: Option<Tube>,
1035 ) -> Result<ExitState>
1036 where
1037     Vcpu: VcpuArch + 'static,
1038     V: VmArch + 'static,
1039 {
1040     if cfg.jail_config.is_some() {
1041         // Printing something to the syslog before entering minijail so that libc's syslogger has a
1042         // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
1043         // access to those files will not be possible.
1044         info!("crosvm entering multiprocess mode");
1045     }
1046 
1047     #[cfg(feature = "usb")]
1048     let (usb_control_tube, usb_provider) =
1049         HostBackendDeviceProvider::new().context("failed to create usb provider")?;
1050 
1051     // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
1052     // before any jailed devices have been spawned, so that we can catch any of them that fail very
1053     // quickly.
1054     let sigchld_fd = SignalFd::new(libc::SIGCHLD).context("failed to create signalfd")?;
1055 
1056     let control_server_socket = match &cfg.socket_path {
1057         Some(path) => Some(UnlinkUnixSeqpacketListener(
1058             UnixSeqpacketListener::bind(path).context("failed to create control server")?,
1059         )),
1060         None => None,
1061     };
1062 
1063     let mut control_tubes = Vec::new();
1064 
1065     #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1066     if let Some(port) = cfg.gdb {
1067         // GDB needs a control socket to interrupt vcpus.
1068         let (gdb_host_tube, gdb_control_tube) = Tube::pair().context("failed to create tube")?;
1069         control_tubes.push(TaggedControlTube::Vm(gdb_host_tube));
1070         components.gdb = Some((port, gdb_control_tube));
1071     }
1072 
1073     for wl_cfg in &cfg.vhost_user_wl {
1074         let wayland_host_tube = UnixSeqpacket::connect(&wl_cfg.vm_tube)
1075             .map(Tube::new)
1076             .context("failed to connect to wayland tube")?;
1077         control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
1078     }
1079 
1080     let mut vhost_user_gpu_tubes = Vec::with_capacity(cfg.vhost_user_gpu.len());
1081     for _ in 0..cfg.vhost_user_gpu.len() {
1082         let (host_control_tube, device_control_tube) =
1083             Tube::pair().context("failed to create tube")?;
1084         let (host_gpu_tube, device_gpu_tube) = Tube::pair().context("failed to create tube")?;
1085         vhost_user_gpu_tubes.push((host_gpu_tube, device_gpu_tube, device_control_tube));
1086         control_tubes.push(TaggedControlTube::VmMemory(host_control_tube));
1087     }
1088 
1089     let (wayland_host_tube, wayland_device_tube) = Tube::pair().context("failed to create tube")?;
1090     control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
1091 
1092     let (balloon_host_tube, balloon_device_tube) = if cfg.balloon {
1093         if let Some(ref path) = cfg.balloon_control {
1094             (
1095                 None,
1096                 Some(Tube::new(
1097                     UnixSeqpacket::connect(path).context("failed to create balloon control")?,
1098                 )),
1099             )
1100         } else {
1101             // Balloon gets a special socket so balloon requests can be forwarded
1102             // from the main process.
1103             let (host, device) = Tube::pair().context("failed to create tube")?;
1104             // Set recv timeout to avoid deadlock on sending BalloonControlCommand
1105             // before the guest is ready.
1106             host.set_recv_timeout(Some(Duration::from_millis(100)))
1107                 .context("failed to set timeout")?;
1108             (Some(host), Some(device))
1109         }
1110     } else {
1111         (None, None)
1112     };
1113 
1114     // Create one control socket per disk.
1115     let mut disk_device_tubes = Vec::new();
1116     let mut disk_host_tubes = Vec::new();
1117     let disk_count = cfg.disks.len();
1118     for _ in 0..disk_count {
1119         let (disk_host_tub, disk_device_tube) = Tube::pair().context("failed to create tube")?;
1120         disk_host_tubes.push(disk_host_tub);
1121         disk_device_tubes.push(disk_device_tube);
1122     }
1123 
1124     let mut pmem_device_tubes = Vec::new();
1125     let pmem_count = cfg.pmem_devices.len();
1126     for _ in 0..pmem_count {
1127         let (pmem_host_tube, pmem_device_tube) = Tube::pair().context("failed to create tube")?;
1128         pmem_device_tubes.push(pmem_device_tube);
1129         control_tubes.push(TaggedControlTube::VmMsync(pmem_host_tube));
1130     }
1131 
1132     let (gpu_host_tube, gpu_device_tube) = Tube::pair().context("failed to create tube")?;
1133     control_tubes.push(TaggedControlTube::VmMemory(gpu_host_tube));
1134 
1135     if let Some(ioapic_host_tube) = ioapic_host_tube {
1136         control_tubes.push(TaggedControlTube::VmIrq(ioapic_host_tube));
1137     }
1138 
1139     let battery = if cfg.battery_type.is_some() {
1140         #[cfg_attr(not(feature = "power-monitor-powerd"), allow(clippy::manual_map))]
1141         let jail = match simple_jail(&cfg.jail_config, "battery")? {
1142             #[cfg_attr(not(feature = "power-monitor-powerd"), allow(unused_mut))]
1143             Some(mut jail) => {
1144                 // Setup a bind mount to the system D-Bus socket if the powerd monitor is used.
1145                 #[cfg(feature = "power-monitor-powerd")]
1146                 {
1147                     add_current_user_to_jail(&mut jail)?;
1148 
1149                     // Create a tmpfs in the device's root directory so that we can bind mount files.
1150                     jail.mount_with_data(
1151                         Path::new("none"),
1152                         Path::new("/"),
1153                         "tmpfs",
1154                         (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
1155                         "size=67108864",
1156                     )?;
1157 
1158                     let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
1159                     jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
1160                 }
1161                 Some(jail)
1162             }
1163             None => None,
1164         };
1165         (&cfg.battery_type, jail)
1166     } else {
1167         (&cfg.battery_type, None)
1168     };
1169 
1170     let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
1171 
1172     let fs_count = cfg
1173         .shared_dirs
1174         .iter()
1175         .filter(|sd| sd.kind == SharedDirKind::FS)
1176         .count();
1177     let mut fs_device_tubes = Vec::with_capacity(fs_count);
1178     for _ in 0..fs_count {
1179         let (fs_host_tube, fs_device_tube) = Tube::pair().context("failed to create tube")?;
1180         control_tubes.push(TaggedControlTube::Fs(fs_host_tube));
1181         fs_device_tubes.push(fs_device_tube);
1182     }
1183 
1184     let mut vvu_proxy_device_tubes = Vec::new();
1185     for _ in 0..cfg.vvu_proxy.len() {
1186         let (vvu_proxy_host_tube, vvu_proxy_device_tube) =
1187             Tube::pair().context("failed to create VVU proxy tube")?;
1188         control_tubes.push(TaggedControlTube::VmMemory(vvu_proxy_host_tube));
1189         vvu_proxy_device_tubes.push(vvu_proxy_device_tube);
1190     }
1191 
1192     let exit_evt = Event::new().context("failed to create event")?;
1193     let reset_evt = Event::new().context("failed to create event")?;
1194     let crash_evt = Event::new().context("failed to create event")?;
1195     let (panic_rdtube, panic_wrtube) = Tube::pair().context("failed to create tube")?;
1196 
1197     let pstore_size = components.pstore.as_ref().map(|pstore| pstore.size as u64);
1198     let mut sys_allocator = SystemAllocator::new(
1199         Arch::get_system_allocator_config(&vm),
1200         pstore_size,
1201         &cfg.mmio_address_ranges,
1202     )
1203     .context("failed to create system allocator")?;
1204 
1205     let ramoops_region = match &components.pstore {
1206         Some(pstore) => Some(
1207             arch::pstore::create_memory_region(
1208                 &mut vm,
1209                 sys_allocator.reserved_region().unwrap(),
1210                 pstore,
1211             )
1212             .context("failed to allocate pstore region")?,
1213         ),
1214         None => None,
1215     };
1216 
1217     create_file_backed_mappings(&cfg, &mut vm, &mut sys_allocator)?;
1218 
1219     #[cfg(feature = "gpu")]
1220     // Hold on to the render server jail so it keeps running until we exit run_vm()
1221     let (_render_server_jail, render_server_fd) =
1222         if let Some(parameters) = &cfg.gpu_render_server_parameters {
1223             let (jail, fd) = start_gpu_render_server(&cfg, parameters)?;
1224             (Some(ScopedMinijail(jail)), Some(fd))
1225         } else {
1226             (None, None)
1227         };
1228 
1229     let init_balloon_size = components
1230         .memory_size
1231         .checked_sub(cfg.init_memory.map_or(components.memory_size, |m| {
1232             m.checked_mul(1024 * 1024).unwrap_or(u64::MAX)
1233         }))
1234         .context("failed to calculate init balloon size")?;
1235 
1236     #[cfg(feature = "direct")]
1237     let mut irqs = Vec::new();
1238 
1239     #[cfg(feature = "direct")]
1240     for irq in &cfg.direct_level_irq {
1241         if !sys_allocator.reserve_irq(*irq) {
1242             warn!("irq {} already reserved.", irq);
1243         }
1244         let irq_evt = devices::IrqLevelEvent::new().context("failed to create event")?;
1245         irq_chip.register_level_irq_event(*irq, &irq_evt).unwrap();
1246         let direct_irq = devices::DirectIrq::new_level(&irq_evt)
1247             .context("failed to enable interrupt forwarding")?;
1248         direct_irq
1249             .irq_enable(*irq)
1250             .context("failed to enable interrupt forwarding")?;
1251 
1252         if cfg.direct_wake_irq.contains(&irq) {
1253             direct_irq
1254                 .irq_wake_enable(*irq)
1255                 .context("failed to enable interrupt wake")?;
1256         }
1257 
1258         irqs.push(direct_irq);
1259     }
1260 
1261     #[cfg(feature = "direct")]
1262     for irq in &cfg.direct_edge_irq {
1263         if !sys_allocator.reserve_irq(*irq) {
1264             warn!("irq {} already reserved.", irq);
1265         }
1266         let irq_evt = devices::IrqEdgeEvent::new().context("failed to create event")?;
1267         irq_chip.register_edge_irq_event(*irq, &irq_evt).unwrap();
1268         let direct_irq = devices::DirectIrq::new_edge(&irq_evt)
1269             .context("failed to enable interrupt forwarding")?;
1270         direct_irq
1271             .irq_enable(*irq)
1272             .context("failed to enable interrupt forwarding")?;
1273 
1274         if cfg.direct_wake_irq.contains(&irq) {
1275             direct_irq
1276                 .irq_wake_enable(*irq)
1277                 .context("failed to enable interrupt wake")?;
1278         }
1279 
1280         irqs.push(direct_irq);
1281     }
1282 
1283     let mut iommu_attached_endpoints: BTreeMap<u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>> =
1284         BTreeMap::new();
1285     let mut devices = create_devices(
1286         &cfg,
1287         &mut vm,
1288         &mut sys_allocator,
1289         &exit_evt,
1290         panic_wrtube,
1291         &mut iommu_attached_endpoints,
1292         &mut control_tubes,
1293         wayland_device_tube,
1294         gpu_device_tube,
1295         vhost_user_gpu_tubes,
1296         balloon_device_tube,
1297         init_balloon_size,
1298         &mut disk_device_tubes,
1299         &mut pmem_device_tubes,
1300         &mut fs_device_tubes,
1301         #[cfg(feature = "usb")]
1302         usb_provider,
1303         Arc::clone(&map_request),
1304         #[cfg(feature = "gpu")]
1305         render_server_fd,
1306         &mut vvu_proxy_device_tubes,
1307         components.memory_size,
1308     )?;
1309 
1310     let mut hp_endpoints_ranges: Vec<RangeInclusive<u32>> = Vec::new();
1311     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1312     let mut hotplug_buses: Vec<Arc<Mutex<dyn HotPlugBus>>> = Vec::new();
1313     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1314     let mut gpe_notify_devs: Vec<(u32, Arc<Mutex<dyn GpeNotify>>)> = Vec::new();
1315     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1316     {
1317         #[cfg(feature = "direct")]
1318         let rp_host = cfg.pcie_rp.clone();
1319         #[cfg(not(feature = "direct"))]
1320         let rp_host: Vec<HostPcieRootPortParameters> = Vec::new();
1321 
1322         // Create Pcie Root Port
1323         create_pcie_root_port(
1324             rp_host,
1325             &mut sys_allocator,
1326             &mut control_tubes,
1327             &mut devices,
1328             &mut hotplug_buses,
1329             &mut hp_endpoints_ranges,
1330             &mut gpe_notify_devs,
1331         )?;
1332     }
1333 
1334     let (translate_response_senders, request_rx) = setup_virtio_access_platform(
1335         &mut sys_allocator,
1336         &mut iommu_attached_endpoints,
1337         &mut devices,
1338     )?;
1339 
1340     let iommu_host_tube = if !iommu_attached_endpoints.is_empty() || cfg.virtio_iommu {
1341         let (iommu_host_tube, iommu_device_tube) = Tube::pair().context("failed to create tube")?;
1342         let iommu_dev = create_iommu_device(
1343             &cfg,
1344             (1u64 << vm.get_guest_phys_addr_bits()) - 1,
1345             iommu_attached_endpoints,
1346             hp_endpoints_ranges,
1347             translate_response_senders,
1348             request_rx,
1349             iommu_device_tube,
1350         )?;
1351 
1352         let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
1353         control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
1354         let mut dev = VirtioPciDevice::new(vm.get_memory().clone(), iommu_dev.dev, msi_device_tube)
1355             .context("failed to create virtio pci dev")?;
1356         // early reservation for viommu.
1357         dev.allocate_address(&mut sys_allocator)
1358             .context("failed to allocate resources early for virtio pci dev")?;
1359         let dev = Box::new(dev);
1360         devices.push((dev, iommu_dev.jail));
1361         Some(iommu_host_tube)
1362     } else {
1363         None
1364     };
1365 
1366     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1367     for device in devices
1368         .iter_mut()
1369         .filter_map(|(dev, _)| dev.as_pci_device_mut())
1370     {
1371         let sdts = device
1372             .generate_acpi(components.acpi_sdts)
1373             .or_else(|| {
1374                 error!("ACPI table generation error");
1375                 None
1376             })
1377             .ok_or_else(|| anyhow!("failed to generate ACPI table"))?;
1378         components.acpi_sdts = sdts;
1379     }
1380 
1381     // KVM_CREATE_VCPU uses apic id for x86 and uses cpu id for others.
1382     let mut kvm_vcpu_ids = Vec::new();
1383 
1384     #[cfg_attr(not(feature = "direct"), allow(unused_mut))]
1385     let mut linux = Arch::build_vm::<V, Vcpu>(
1386         components,
1387         &exit_evt,
1388         &reset_evt,
1389         &mut sys_allocator,
1390         &cfg.serial_parameters,
1391         simple_jail(&cfg.jail_config, "serial")?,
1392         battery,
1393         vm,
1394         ramoops_region,
1395         devices,
1396         irq_chip,
1397         &mut kvm_vcpu_ids,
1398     )
1399     .context("the architecture failed to build the vm")?;
1400 
1401     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1402     {
1403         for hotplug_bus in hotplug_buses.iter() {
1404             linux.hotplug_bus.push(hotplug_bus.clone());
1405         }
1406 
1407         if let Some(pm) = &linux.pm {
1408             while let Some((gpe, notify_dev)) = gpe_notify_devs.pop() {
1409                 pm.lock().register_gpe_notify_dev(gpe, notify_dev);
1410             }
1411         }
1412     }
1413 
1414     #[cfg(feature = "direct")]
1415     if let Some(pmio) = &cfg.direct_pmio {
1416         let direct_io = Arc::new(
1417             devices::DirectIo::new(&pmio.path, false).context("failed to open direct io device")?,
1418         );
1419         for range in pmio.ranges.iter() {
1420             linux
1421                 .io_bus
1422                 .insert_sync(direct_io.clone(), range.base, range.len)
1423                 .unwrap();
1424         }
1425     };
1426 
1427     #[cfg(feature = "direct")]
1428     if let Some(mmio) = &cfg.direct_mmio {
1429         let direct_mmio = Arc::new(
1430             devices::DirectMmio::new(&mmio.path, false, &mmio.ranges)
1431                 .context("failed to open direct mmio device")?,
1432         );
1433 
1434         for range in mmio.ranges.iter() {
1435             linux
1436                 .mmio_bus
1437                 .insert_sync(direct_mmio.clone(), range.base, range.len)
1438                 .unwrap();
1439         }
1440     };
1441 
1442     let gralloc = RutabagaGralloc::new().context("failed to create gralloc")?;
1443     run_control(
1444         linux,
1445         sys_allocator,
1446         cfg,
1447         control_server_socket,
1448         control_tubes,
1449         balloon_host_tube,
1450         &disk_host_tubes,
1451         #[cfg(feature = "usb")]
1452         usb_control_tube,
1453         exit_evt,
1454         reset_evt,
1455         crash_evt,
1456         panic_rdtube,
1457         sigchld_fd,
1458         Arc::clone(&map_request),
1459         gralloc,
1460         kvm_vcpu_ids,
1461         iommu_host_tube,
1462     )
1463 }
1464 
get_hp_bus<V: VmArch, Vcpu: VcpuArch>( linux: &RunnableLinuxVm<V, Vcpu>, host_addr: PciAddress, ) -> Result<(Arc<Mutex<dyn HotPlugBus>>, u8)>1465 fn get_hp_bus<V: VmArch, Vcpu: VcpuArch>(
1466     linux: &RunnableLinuxVm<V, Vcpu>,
1467     host_addr: PciAddress,
1468 ) -> Result<(Arc<Mutex<dyn HotPlugBus>>, u8)> {
1469     for hp_bus in linux.hotplug_bus.iter() {
1470         if let Some(number) = hp_bus.lock().is_match(host_addr) {
1471             return Ok((hp_bus.clone(), number));
1472         }
1473     }
1474     Err(anyhow!("Failed to find a suitable hotplug bus"))
1475 }
1476 
add_vfio_device<V: VmArch, Vcpu: VcpuArch>( linux: &mut RunnableLinuxVm<V, Vcpu>, sys_allocator: &mut SystemAllocator, cfg: &Config, control_tubes: &mut Vec<TaggedControlTube>, iommu_host_tube: &Option<Tube>, vfio_path: &Path, ) -> Result<()>1477 fn add_vfio_device<V: VmArch, Vcpu: VcpuArch>(
1478     linux: &mut RunnableLinuxVm<V, Vcpu>,
1479     sys_allocator: &mut SystemAllocator,
1480     cfg: &Config,
1481     control_tubes: &mut Vec<TaggedControlTube>,
1482     iommu_host_tube: &Option<Tube>,
1483     vfio_path: &Path,
1484 ) -> Result<()> {
1485     let host_os_str = vfio_path
1486         .file_name()
1487         .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
1488     let host_str = host_os_str
1489         .to_str()
1490         .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
1491     let host_addr = PciAddress::from_str(host_str).context("failed to parse vfio pci address")?;
1492 
1493     let (hp_bus, bus_num) = get_hp_bus(linux, host_addr)?;
1494 
1495     let mut endpoints: BTreeMap<u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>> = BTreeMap::new();
1496     let (vfio_pci_device, jail) = create_vfio_device(
1497         cfg,
1498         &linux.vm,
1499         sys_allocator,
1500         control_tubes,
1501         vfio_path,
1502         Some(bus_num),
1503         None,
1504         &mut endpoints,
1505         None,
1506         if iommu_host_tube.is_some() {
1507             IommuDevType::VirtioIommu
1508         } else {
1509             IommuDevType::NoIommu
1510         },
1511     )?;
1512 
1513     let pci_address = Arch::register_pci_device(linux, vfio_pci_device, jail, sys_allocator)
1514         .context("Failed to configure pci hotplug device")?;
1515 
1516     if let Some(iommu_host_tube) = iommu_host_tube {
1517         let &endpoint_addr = endpoints.iter().next().unwrap().0;
1518         let mapper = endpoints.remove(&endpoint_addr).unwrap();
1519         if let Some(vfio_wrapper) = mapper.lock().as_vfio_wrapper() {
1520             let vfio_container = vfio_wrapper.as_vfio_container();
1521             let descriptor = vfio_container.lock().into_raw_descriptor()?;
1522             let request = VirtioIOMMURequest::VfioCommand(VirtioIOMMUVfioCommand::VfioDeviceAdd {
1523                 endpoint_addr,
1524                 container: {
1525                     // Safe because the descriptor is uniquely owned by `descriptor`.
1526                     unsafe { File::from_raw_descriptor(descriptor) }
1527                 },
1528             });
1529 
1530             match virtio_iommu_request(iommu_host_tube, &request)
1531                 .map_err(|_| VirtioIOMMUVfioError::SocketFailed)?
1532             {
1533                 VirtioIOMMUResponse::VfioResponse(VirtioIOMMUVfioResult::Ok) => (),
1534                 resp => bail!("Unexpected message response: {:?}", resp),
1535             }
1536         };
1537     }
1538 
1539     let host_key = HostHotPlugKey::Vfio { host_addr };
1540     let mut hp_bus = hp_bus.lock();
1541     hp_bus.add_hotplug_device(host_key, pci_address);
1542     hp_bus.hot_plug(pci_address);
1543     Ok(())
1544 }
1545 
remove_vfio_device<V: VmArch, Vcpu: VcpuArch>( linux: &RunnableLinuxVm<V, Vcpu>, sys_allocator: &mut SystemAllocator, iommu_host_tube: &Option<Tube>, vfio_path: &Path, ) -> Result<()>1546 fn remove_vfio_device<V: VmArch, Vcpu: VcpuArch>(
1547     linux: &RunnableLinuxVm<V, Vcpu>,
1548     sys_allocator: &mut SystemAllocator,
1549     iommu_host_tube: &Option<Tube>,
1550     vfio_path: &Path,
1551 ) -> Result<()> {
1552     let host_os_str = vfio_path
1553         .file_name()
1554         .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
1555     let host_str = host_os_str
1556         .to_str()
1557         .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
1558     let host_addr = PciAddress::from_str(host_str).context("failed to parse vfio pci address")?;
1559     let host_key = HostHotPlugKey::Vfio { host_addr };
1560     for hp_bus in linux.hotplug_bus.iter() {
1561         let mut hp_bus_lock = hp_bus.lock();
1562         if let Some(pci_addr) = hp_bus_lock.get_hotplug_device(host_key) {
1563             if let Some(iommu_host_tube) = iommu_host_tube {
1564                 let request =
1565                     VirtioIOMMURequest::VfioCommand(VirtioIOMMUVfioCommand::VfioDeviceDel {
1566                         endpoint_addr: pci_addr.to_u32(),
1567                     });
1568                 match virtio_iommu_request(iommu_host_tube, &request)
1569                     .map_err(|_| VirtioIOMMUVfioError::SocketFailed)?
1570                 {
1571                     VirtioIOMMUResponse::VfioResponse(VirtioIOMMUVfioResult::Ok) => (),
1572                     resp => bail!("Unexpected message response: {:?}", resp),
1573                 }
1574             }
1575 
1576             hp_bus_lock.hot_unplug(pci_addr);
1577             sys_allocator.release_pci(pci_addr.bus, pci_addr.dev, pci_addr.func);
1578             return Ok(());
1579         }
1580     }
1581 
1582     Err(anyhow!("HotPlugBus hasn't been implemented"))
1583 }
1584 
handle_vfio_command<V: VmArch, Vcpu: VcpuArch>( linux: &mut RunnableLinuxVm<V, Vcpu>, sys_allocator: &mut SystemAllocator, cfg: &Config, add_tubes: &mut Vec<TaggedControlTube>, iommu_host_tube: &Option<Tube>, vfio_path: &Path, add: bool, ) -> VmResponse1585 fn handle_vfio_command<V: VmArch, Vcpu: VcpuArch>(
1586     linux: &mut RunnableLinuxVm<V, Vcpu>,
1587     sys_allocator: &mut SystemAllocator,
1588     cfg: &Config,
1589     add_tubes: &mut Vec<TaggedControlTube>,
1590     iommu_host_tube: &Option<Tube>,
1591     vfio_path: &Path,
1592     add: bool,
1593 ) -> VmResponse {
1594     let ret = if add {
1595         add_vfio_device(
1596             linux,
1597             sys_allocator,
1598             cfg,
1599             add_tubes,
1600             iommu_host_tube,
1601             vfio_path,
1602         )
1603     } else {
1604         remove_vfio_device(linux, sys_allocator, iommu_host_tube, vfio_path)
1605     };
1606 
1607     match ret {
1608         Ok(()) => VmResponse::Ok,
1609         Err(e) => {
1610             error!("hanlde_vfio_command failure: {}", e);
1611             add_tubes.clear();
1612             VmResponse::Err(base::Error::new(libc::EINVAL))
1613         }
1614     }
1615 }
1616 
run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>( mut linux: RunnableLinuxVm<V, Vcpu>, mut sys_allocator: SystemAllocator, cfg: Config, control_server_socket: Option<UnlinkUnixSeqpacketListener>, mut control_tubes: Vec<TaggedControlTube>, balloon_host_tube: Option<Tube>, disk_host_tubes: &[Tube], #[cfg(feature = "usb")] usb_control_tube: Tube, exit_evt: Event, reset_evt: Event, crash_evt: Event, panic_rdtube: Tube, sigchld_fd: SignalFd, map_request: Arc<Mutex<Option<ExternalMapping>>>, mut gralloc: RutabagaGralloc, kvm_vcpu_ids: Vec<usize>, iommu_host_tube: Option<Tube>, ) -> Result<ExitState>1617 fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
1618     mut linux: RunnableLinuxVm<V, Vcpu>,
1619     mut sys_allocator: SystemAllocator,
1620     cfg: Config,
1621     control_server_socket: Option<UnlinkUnixSeqpacketListener>,
1622     mut control_tubes: Vec<TaggedControlTube>,
1623     balloon_host_tube: Option<Tube>,
1624     disk_host_tubes: &[Tube],
1625     #[cfg(feature = "usb")] usb_control_tube: Tube,
1626     exit_evt: Event,
1627     reset_evt: Event,
1628     crash_evt: Event,
1629     panic_rdtube: Tube,
1630     sigchld_fd: SignalFd,
1631     map_request: Arc<Mutex<Option<ExternalMapping>>>,
1632     mut gralloc: RutabagaGralloc,
1633     kvm_vcpu_ids: Vec<usize>,
1634     iommu_host_tube: Option<Tube>,
1635 ) -> Result<ExitState> {
1636     #[derive(PollToken)]
1637     enum Token {
1638         Exit,
1639         Reset,
1640         Crash,
1641         Panic,
1642         Suspend,
1643         ChildSignal,
1644         IrqFd { index: IrqEventIndex },
1645         VmControlServer,
1646         VmControl { index: usize },
1647         DelayedIrqFd,
1648     }
1649 
1650     stdin()
1651         .set_raw_mode()
1652         .expect("failed to set terminal raw mode");
1653 
1654     let wait_ctx = WaitContext::build_with(&[
1655         (&exit_evt, Token::Exit),
1656         (&reset_evt, Token::Reset),
1657         (&crash_evt, Token::Crash),
1658         (&panic_rdtube, Token::Panic),
1659         (&linux.suspend_evt, Token::Suspend),
1660         (&sigchld_fd, Token::ChildSignal),
1661     ])
1662     .context("failed to add descriptor to wait context")?;
1663 
1664     if let Some(socket_server) = &control_server_socket {
1665         wait_ctx
1666             .add(socket_server, Token::VmControlServer)
1667             .context("failed to add descriptor to wait context")?;
1668     }
1669     for (index, socket) in control_tubes.iter().enumerate() {
1670         wait_ctx
1671             .add(socket.as_ref(), Token::VmControl { index })
1672             .context("failed to add descriptor to wait context")?;
1673     }
1674 
1675     let events = linux
1676         .irq_chip
1677         .irq_event_tokens()
1678         .context("failed to add descriptor to wait context")?;
1679 
1680     for (index, _gsi, evt) in events {
1681         wait_ctx
1682             .add(&evt, Token::IrqFd { index })
1683             .context("failed to add descriptor to wait context")?;
1684     }
1685 
1686     if let Some(delayed_ioapic_irq_trigger) = linux.irq_chip.irq_delayed_event_token()? {
1687         wait_ctx
1688             .add(&delayed_ioapic_irq_trigger, Token::DelayedIrqFd)
1689             .context("failed to add descriptor to wait context")?;
1690     }
1691 
1692     if cfg.jail_config.is_some() {
1693         // Before starting VCPUs, in case we started with some capabilities, drop them all.
1694         drop_capabilities().context("failed to drop process capabilities")?;
1695     }
1696 
1697     #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1698     // Create a channel for GDB thread.
1699     let (to_gdb_channel, from_vcpu_channel) = if linux.gdb.is_some() {
1700         let (s, r) = mpsc::channel();
1701         (Some(s), Some(r))
1702     } else {
1703         (None, None)
1704     };
1705 
1706     let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
1707     let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
1708     let use_hypervisor_signals = !linux
1709         .vm
1710         .get_hypervisor()
1711         .check_capability(HypervisorCap::ImmediateExit);
1712     vcpu::setup_vcpu_signal_handler::<Vcpu>(use_hypervisor_signals)?;
1713 
1714     let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
1715         Some(vec) => vec.into_iter().map(Some).collect(),
1716         None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
1717     };
1718     // Enable core scheduling before creating vCPUs so that the cookie will be
1719     // shared by all vCPU threads.
1720     // TODO(b/199312402): Avoid enabling core scheduling for the crosvm process
1721     // itself for even better performance. Only vCPUs need the feature.
1722     if cfg.per_vm_core_scheduling {
1723         if let Err(e) = enable_core_scheduling() {
1724             error!("Failed to enable core scheduling: {}", e);
1725         }
1726     }
1727     let vcpu_cgroup_tasks_file = match &cfg.vcpu_cgroup_path {
1728         None => None,
1729         Some(cgroup_path) => {
1730             // Move main process to cgroup_path
1731             let mut f = File::create(&cgroup_path.join("tasks"))?;
1732             f.write_all(process::id().to_string().as_bytes())?;
1733             Some(f)
1734         }
1735     };
1736 
1737     #[cfg(target_os = "android")]
1738     android::set_process_profiles(&cfg.task_profiles)?;
1739 
1740     for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
1741         let (to_vcpu_channel, from_main_channel) = mpsc::channel();
1742         let vcpu_affinity = match linux.vcpu_affinity.clone() {
1743             Some(VcpuAffinity::Global(v)) => v,
1744             Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
1745             None => Default::default(),
1746         };
1747         let handle = vcpu::run_vcpu(
1748             cpu_id,
1749             kvm_vcpu_ids[cpu_id],
1750             vcpu,
1751             linux.vm.try_clone().context("failed to clone vm")?,
1752             linux
1753                 .irq_chip
1754                 .try_box_clone()
1755                 .context("failed to clone irqchip")?,
1756             linux.vcpu_count,
1757             linux.rt_cpus.contains(&cpu_id),
1758             vcpu_affinity,
1759             linux.delay_rt,
1760             linux.no_smt,
1761             vcpu_thread_barrier.clone(),
1762             linux.has_bios,
1763             (*linux.io_bus).clone(),
1764             (*linux.mmio_bus).clone(),
1765             exit_evt.try_clone().context("failed to clone event")?,
1766             reset_evt.try_clone().context("failed to clone event")?,
1767             crash_evt.try_clone().context("failed to clone event")?,
1768             linux.vm.check_capability(VmCap::PvClockSuspend),
1769             from_main_channel,
1770             use_hypervisor_signals,
1771             #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1772             to_gdb_channel.clone(),
1773             cfg.per_vm_core_scheduling,
1774             cfg.host_cpu_topology,
1775             cfg.privileged_vm,
1776             match vcpu_cgroup_tasks_file {
1777                 None => None,
1778                 Some(ref f) => Some(
1779                     f.try_clone()
1780                         .context("failed to clone vcpu cgroup tasks file")?,
1781                 ),
1782             },
1783             cfg.userspace_msr.clone(),
1784         )?;
1785         vcpu_handles.push((handle, to_vcpu_channel));
1786     }
1787 
1788     #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1789     // Spawn GDB thread.
1790     if let Some((gdb_port_num, gdb_control_tube)) = linux.gdb.take() {
1791         let to_vcpu_channels = vcpu_handles
1792             .iter()
1793             .map(|(_handle, channel)| channel.clone())
1794             .collect();
1795         let target = GdbStub::new(
1796             gdb_control_tube,
1797             to_vcpu_channels,
1798             from_vcpu_channel.unwrap(), // Must succeed to unwrap()
1799         );
1800         thread::Builder::new()
1801             .name("gdb".to_owned())
1802             .spawn(move || gdb_thread(target, gdb_port_num))
1803             .context("failed to spawn GDB thread")?;
1804     };
1805 
1806     vcpu_thread_barrier.wait();
1807 
1808     let mut exit_state = ExitState::Stop;
1809     let mut balloon_stats_id: u64 = 0;
1810 
1811     'wait: loop {
1812         let events = {
1813             match wait_ctx.wait() {
1814                 Ok(v) => v,
1815                 Err(e) => {
1816                     error!("failed to poll: {}", e);
1817                     break;
1818                 }
1819             }
1820         };
1821 
1822         let mut vm_control_indices_to_remove = Vec::new();
1823         for event in events.iter().filter(|e| e.is_readable) {
1824             match event.token {
1825                 Token::Exit => {
1826                     info!("vcpu requested shutdown");
1827                     break 'wait;
1828                 }
1829                 Token::Reset => {
1830                     info!("vcpu requested reset");
1831                     exit_state = ExitState::Reset;
1832                     break 'wait;
1833                 }
1834                 Token::Crash => {
1835                     info!("vcpu crashed");
1836                     exit_state = ExitState::Crash;
1837                     break 'wait;
1838                 }
1839                 Token::Panic => {
1840                     let mut break_to_wait: bool = true;
1841                     match panic_rdtube.recv::<u8>() {
1842                         Ok(panic_code) => {
1843                             let panic_code = PvPanicCode::from_u8(panic_code);
1844                             info!("Guest reported panic [Code: {}]", panic_code);
1845                             if panic_code == PvPanicCode::CrashLoaded {
1846                                 // VM is booting to crash kernel.
1847                                 break_to_wait = false;
1848                             }
1849                         }
1850                         Err(e) => {
1851                             warn!("failed to recv panic event: {} ", e);
1852                         }
1853                     }
1854                     if break_to_wait {
1855                         exit_state = ExitState::GuestPanic;
1856                         break 'wait;
1857                     }
1858                 }
1859                 Token::Suspend => {
1860                     info!("VM requested suspend");
1861                     linux.suspend_evt.read().unwrap();
1862                     vcpu::kick_all_vcpus(
1863                         &vcpu_handles,
1864                         linux.irq_chip.as_irq_chip(),
1865                         VcpuControl::RunState(VmRunMode::Suspending),
1866                     );
1867                 }
1868                 Token::ChildSignal => {
1869                     // Print all available siginfo structs, then exit the loop.
1870                     while let Some(siginfo) =
1871                         sigchld_fd.read().context("failed to create signalfd")?
1872                     {
1873                         let pid = siginfo.ssi_pid;
1874                         let pid_label = match linux.pid_debug_label_map.get(&pid) {
1875                             Some(label) => format!("{} (pid {})", label, pid),
1876                             None => format!("pid {}", pid),
1877                         };
1878                         error!(
1879                             "child {} died: signo {}, status {}, code {}",
1880                             pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
1881                         );
1882                     }
1883                     break 'wait;
1884                 }
1885                 Token::IrqFd { index } => {
1886                     if let Err(e) = linux.irq_chip.service_irq_event(index) {
1887                         error!("failed to signal irq {}: {}", index, e);
1888                     }
1889                 }
1890                 Token::DelayedIrqFd => {
1891                     if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
1892                         warn!("can't deliver delayed irqs: {}", e);
1893                     }
1894                 }
1895                 Token::VmControlServer => {
1896                     if let Some(socket_server) = &control_server_socket {
1897                         match socket_server.accept() {
1898                             Ok(socket) => {
1899                                 wait_ctx
1900                                     .add(
1901                                         &socket,
1902                                         Token::VmControl {
1903                                             index: control_tubes.len(),
1904                                         },
1905                                     )
1906                                     .context("failed to add descriptor to wait context")?;
1907                                 control_tubes.push(TaggedControlTube::Vm(Tube::new(socket)));
1908                             }
1909                             Err(e) => error!("failed to accept socket: {}", e),
1910                         }
1911                     }
1912                 }
1913                 Token::VmControl { index } => {
1914                     let mut add_tubes = Vec::new();
1915                     if let Some(socket) = control_tubes.get(index) {
1916                         match socket {
1917                             TaggedControlTube::Vm(tube) => match tube.recv::<VmRequest>() {
1918                                 Ok(request) => {
1919                                     let mut run_mode_opt = None;
1920                                     let response = match request {
1921                                         VmRequest::VfioCommand { vfio_path, add } => {
1922                                             handle_vfio_command(
1923                                                 &mut linux,
1924                                                 &mut sys_allocator,
1925                                                 &cfg,
1926                                                 &mut add_tubes,
1927                                                 &iommu_host_tube,
1928                                                 &vfio_path,
1929                                                 add,
1930                                             )
1931                                         }
1932                                         _ => request.execute(
1933                                             &mut run_mode_opt,
1934                                             balloon_host_tube.as_ref(),
1935                                             &mut balloon_stats_id,
1936                                             disk_host_tubes,
1937                                             &mut linux.pm,
1938                                             #[cfg(feature = "usb")]
1939                                             Some(&usb_control_tube),
1940                                             #[cfg(not(feature = "usb"))]
1941                                             None,
1942                                             &mut linux.bat_control,
1943                                             &vcpu_handles,
1944                                         ),
1945                                     };
1946 
1947                                     if let Err(e) = tube.send(&response) {
1948                                         error!("failed to send VmResponse: {}", e);
1949                                     }
1950                                     if let Some(run_mode) = run_mode_opt {
1951                                         info!("control socket changed run mode to {}", run_mode);
1952                                         match run_mode {
1953                                             VmRunMode::Exiting => {
1954                                                 break 'wait;
1955                                             }
1956                                             other => {
1957                                                 if other == VmRunMode::Running {
1958                                                     for dev in &linux.resume_notify_devices {
1959                                                         dev.lock().resume_imminent();
1960                                                     }
1961                                                 }
1962                                                 vcpu::kick_all_vcpus(
1963                                                     &vcpu_handles,
1964                                                     linux.irq_chip.as_irq_chip(),
1965                                                     VcpuControl::RunState(other),
1966                                                 );
1967                                             }
1968                                         }
1969                                     }
1970                                 }
1971                                 Err(e) => {
1972                                     if let TubeError::Disconnected = e {
1973                                         vm_control_indices_to_remove.push(index);
1974                                     } else {
1975                                         error!("failed to recv VmRequest: {}", e);
1976                                     }
1977                                 }
1978                             },
1979                             TaggedControlTube::VmMemory(tube) => {
1980                                 match tube.recv::<VmMemoryRequest>() {
1981                                     Ok(request) => {
1982                                         let response = request.execute(
1983                                             &mut linux.vm,
1984                                             &mut sys_allocator,
1985                                             Arc::clone(&map_request),
1986                                             &mut gralloc,
1987                                         );
1988                                         if let Err(e) = tube.send(&response) {
1989                                             error!("failed to send VmMemoryControlResponse: {}", e);
1990                                         }
1991                                     }
1992                                     Err(e) => {
1993                                         if let TubeError::Disconnected = e {
1994                                             vm_control_indices_to_remove.push(index);
1995                                         } else {
1996                                             error!("failed to recv VmMemoryControlRequest: {}", e);
1997                                         }
1998                                     }
1999                                 }
2000                             }
2001                             TaggedControlTube::VmIrq(tube) => match tube.recv::<VmIrqRequest>() {
2002                                 Ok(request) => {
2003                                     let response = {
2004                                         let irq_chip = &mut linux.irq_chip;
2005                                         request.execute(
2006                                             |setup| match setup {
2007                                                 IrqSetup::Event(irq, ev, _, _, _) => {
2008                                                     let irq_evt = devices::IrqEdgeEvent::from_event(ev.try_clone()?);
2009                                                     if let Some(event_index) = irq_chip
2010                                                         .register_edge_irq_event(irq, &irq_evt)?
2011                                                     {
2012                                                         match wait_ctx.add(
2013                                                             ev,
2014                                                             Token::IrqFd {
2015                                                                 index: event_index
2016                                                             },
2017                                                         ) {
2018                                                             Err(e) => {
2019                                                                 warn!("failed to add IrqFd to poll context: {}", e);
2020                                                                 Err(e)
2021                                                             },
2022                                                             Ok(_) => {
2023                                                                 Ok(())
2024                                                             }
2025                                                         }
2026                                                     } else {
2027                                                         Ok(())
2028                                                     }
2029                                                 }
2030                                                 IrqSetup::Route(route) => irq_chip.route_irq(route),
2031                                                 IrqSetup::UnRegister(irq, ev) => {
2032                                                     let irq_evt = devices::IrqEdgeEvent::from_event(ev.try_clone()?);
2033                                                     irq_chip.unregister_edge_irq_event(irq, &irq_evt)
2034                                                 }
2035                                             },
2036                                             &mut sys_allocator,
2037                                         )
2038                                     };
2039                                     if let Err(e) = tube.send(&response) {
2040                                         error!("failed to send VmIrqResponse: {}", e);
2041                                     }
2042                                 }
2043                                 Err(e) => {
2044                                     if let TubeError::Disconnected = e {
2045                                         vm_control_indices_to_remove.push(index);
2046                                     } else {
2047                                         error!("failed to recv VmIrqRequest: {}", e);
2048                                     }
2049                                 }
2050                             },
2051                             TaggedControlTube::VmMsync(tube) => {
2052                                 match tube.recv::<VmMsyncRequest>() {
2053                                     Ok(request) => {
2054                                         let response = request.execute(&mut linux.vm);
2055                                         if let Err(e) = tube.send(&response) {
2056                                             error!("failed to send VmMsyncResponse: {}", e);
2057                                         }
2058                                     }
2059                                     Err(e) => {
2060                                         if let TubeError::Disconnected = e {
2061                                             vm_control_indices_to_remove.push(index);
2062                                         } else {
2063                                             error!("failed to recv VmMsyncRequest: {}", e);
2064                                         }
2065                                     }
2066                                 }
2067                             }
2068                             TaggedControlTube::Fs(tube) => match tube.recv::<FsMappingRequest>() {
2069                                 Ok(request) => {
2070                                     let response =
2071                                         request.execute(&mut linux.vm, &mut sys_allocator);
2072                                     if let Err(e) = tube.send(&response) {
2073                                         error!("failed to send VmResponse: {}", e);
2074                                     }
2075                                 }
2076                                 Err(e) => {
2077                                     if let TubeError::Disconnected = e {
2078                                         vm_control_indices_to_remove.push(index);
2079                                     } else {
2080                                         error!("failed to recv VmResponse: {}", e);
2081                                     }
2082                                 }
2083                             },
2084                         }
2085                     }
2086                     if !add_tubes.is_empty() {
2087                         for (idx, socket) in add_tubes.iter().enumerate() {
2088                             wait_ctx
2089                                 .add(
2090                                     socket.as_ref(),
2091                                     Token::VmControl {
2092                                         index: idx + control_tubes.len(),
2093                                     },
2094                                 )
2095                                 .context(
2096                                     "failed to add hotplug vfio-pci descriptor ot wait context",
2097                                 )?;
2098                         }
2099                         control_tubes.append(&mut add_tubes);
2100                     }
2101                 }
2102             }
2103         }
2104 
2105         // It's possible more data is readable and buffered while the socket is hungup,
2106         // so don't delete the tube from the poll context until we're sure all the
2107         // data is read.
2108         // Below case covers a condition where we have received a hungup event and the tube is not
2109         // readable.
2110         // In case of readable tube, once all data is read, any attempt to read more data on hungup
2111         // tube should fail. On such failure, we get Disconnected error and index gets added to
2112         // vm_control_indices_to_remove by the time we reach here.
2113         for event in events.iter().filter(|e| e.is_hungup && !e.is_readable) {
2114             if let Token::VmControl { index } = event.token {
2115                 vm_control_indices_to_remove.push(index);
2116             }
2117         }
2118 
2119         // Sort in reverse so the highest indexes are removed first. This removal algorithm
2120         // preserves correct indexes as each element is removed.
2121         vm_control_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
2122         vm_control_indices_to_remove.dedup();
2123         for index in vm_control_indices_to_remove {
2124             // Delete the socket from the `wait_ctx` synchronously. Otherwise, the kernel will do
2125             // this automatically when the FD inserted into the `wait_ctx` is closed after this
2126             // if-block, but this removal can be deferred unpredictably. In some instances where the
2127             // system is under heavy load, we can even get events returned by `wait_ctx` for an FD
2128             // that has already been closed. Because the token associated with that spurious event
2129             // now belongs to a different socket, the control loop will start to interact with
2130             // sockets that might not be ready to use. This can cause incorrect hangup detection or
2131             // blocking on a socket that will never be ready. See also: crbug.com/1019986
2132             if let Some(socket) = control_tubes.get(index) {
2133                 wait_ctx
2134                     .delete(socket)
2135                     .context("failed to remove descriptor from wait context")?;
2136             }
2137 
2138             // This line implicitly drops the socket at `index` when it gets returned by
2139             // `swap_remove`. After this line, the socket at `index` is not the one from
2140             // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
2141             // use `wait_ctx.modify` to change the associated index in its `Token::VmControl`.
2142             control_tubes.swap_remove(index);
2143             if let Some(tube) = control_tubes.get(index) {
2144                 wait_ctx
2145                     .modify(tube, EventType::Read, Token::VmControl { index })
2146                     .context("failed to add descriptor to wait context")?;
2147             }
2148         }
2149     }
2150 
2151     vcpu::kick_all_vcpus(
2152         &vcpu_handles,
2153         linux.irq_chip.as_irq_chip(),
2154         VcpuControl::RunState(VmRunMode::Exiting),
2155     );
2156     for (handle, _) in vcpu_handles {
2157         if let Err(e) = handle.join() {
2158             error!("failed to join vcpu thread: {:?}", e);
2159         }
2160     }
2161 
2162     // Explicitly drop the VM structure here to allow the devices to clean up before the
2163     // control sockets are closed when this function exits.
2164     mem::drop(linux);
2165 
2166     stdin()
2167         .set_canon_mode()
2168         .expect("failed to restore canonical mode for terminal");
2169 
2170     Ok(exit_state)
2171 }
2172 
2173 #[cfg(test)]
2174 mod tests {
2175     use super::*;
2176     use std::path::PathBuf;
2177 
2178     // Create a file-backed mapping parameters struct with the given `address` and `size` and other
2179     // parameters set to default values.
test_file_backed_mapping(address: u64, size: u64) -> FileBackedMappingParameters2180     fn test_file_backed_mapping(address: u64, size: u64) -> FileBackedMappingParameters {
2181         FileBackedMappingParameters {
2182             address,
2183             size,
2184             path: PathBuf::new(),
2185             offset: 0,
2186             writable: false,
2187             sync: false,
2188         }
2189     }
2190 
2191     #[test]
guest_mem_file_backed_mappings_overlap()2192     fn guest_mem_file_backed_mappings_overlap() {
2193         // Base case: no file mappings; output layout should be identical.
2194         assert_eq!(
2195             punch_holes_in_guest_mem_layout_for_mappings(
2196                 vec![
2197                     (GuestAddress(0), 0xD000_0000),
2198                     (GuestAddress(0x1_0000_0000), 0x8_0000),
2199                 ],
2200                 &[]
2201             ),
2202             vec![
2203                 (GuestAddress(0), 0xD000_0000),
2204                 (GuestAddress(0x1_0000_0000), 0x8_0000),
2205             ]
2206         );
2207 
2208         // File mapping that does not overlap guest memory.
2209         assert_eq!(
2210             punch_holes_in_guest_mem_layout_for_mappings(
2211                 vec![
2212                     (GuestAddress(0), 0xD000_0000),
2213                     (GuestAddress(0x1_0000_0000), 0x8_0000),
2214                 ],
2215                 &[test_file_backed_mapping(0xD000_0000, 0x1000)]
2216             ),
2217             vec![
2218                 (GuestAddress(0), 0xD000_0000),
2219                 (GuestAddress(0x1_0000_0000), 0x8_0000),
2220             ]
2221         );
2222 
2223         // File mapping at the start of the low address space region.
2224         assert_eq!(
2225             punch_holes_in_guest_mem_layout_for_mappings(
2226                 vec![
2227                     (GuestAddress(0), 0xD000_0000),
2228                     (GuestAddress(0x1_0000_0000), 0x8_0000),
2229                 ],
2230                 &[test_file_backed_mapping(0, 0x2000)]
2231             ),
2232             vec![
2233                 (GuestAddress(0x2000), 0xD000_0000 - 0x2000),
2234                 (GuestAddress(0x1_0000_0000), 0x8_0000),
2235             ]
2236         );
2237 
2238         // File mapping at the end of the low address space region.
2239         assert_eq!(
2240             punch_holes_in_guest_mem_layout_for_mappings(
2241                 vec![
2242                     (GuestAddress(0), 0xD000_0000),
2243                     (GuestAddress(0x1_0000_0000), 0x8_0000),
2244                 ],
2245                 &[test_file_backed_mapping(0xD000_0000 - 0x2000, 0x2000)]
2246             ),
2247             vec![
2248                 (GuestAddress(0), 0xD000_0000 - 0x2000),
2249                 (GuestAddress(0x1_0000_0000), 0x8_0000),
2250             ]
2251         );
2252 
2253         // File mapping fully contained within the middle of the low address space region.
2254         assert_eq!(
2255             punch_holes_in_guest_mem_layout_for_mappings(
2256                 vec![
2257                     (GuestAddress(0), 0xD000_0000),
2258                     (GuestAddress(0x1_0000_0000), 0x8_0000),
2259                 ],
2260                 &[test_file_backed_mapping(0x1000, 0x2000)]
2261             ),
2262             vec![
2263                 (GuestAddress(0), 0x1000),
2264                 (GuestAddress(0x3000), 0xD000_0000 - 0x3000),
2265                 (GuestAddress(0x1_0000_0000), 0x8_0000),
2266             ]
2267         );
2268 
2269         // File mapping at the start of the high address space region.
2270         assert_eq!(
2271             punch_holes_in_guest_mem_layout_for_mappings(
2272                 vec![
2273                     (GuestAddress(0), 0xD000_0000),
2274                     (GuestAddress(0x1_0000_0000), 0x8_0000),
2275                 ],
2276                 &[test_file_backed_mapping(0x1_0000_0000, 0x2000)]
2277             ),
2278             vec![
2279                 (GuestAddress(0), 0xD000_0000),
2280                 (GuestAddress(0x1_0000_2000), 0x8_0000 - 0x2000),
2281             ]
2282         );
2283 
2284         // File mapping at the end of the high address space region.
2285         assert_eq!(
2286             punch_holes_in_guest_mem_layout_for_mappings(
2287                 vec![
2288                     (GuestAddress(0), 0xD000_0000),
2289                     (GuestAddress(0x1_0000_0000), 0x8_0000),
2290                 ],
2291                 &[test_file_backed_mapping(0x1_0008_0000 - 0x2000, 0x2000)]
2292             ),
2293             vec![
2294                 (GuestAddress(0), 0xD000_0000),
2295                 (GuestAddress(0x1_0000_0000), 0x8_0000 - 0x2000),
2296             ]
2297         );
2298 
2299         // File mapping fully contained within the middle of the high address space region.
2300         assert_eq!(
2301             punch_holes_in_guest_mem_layout_for_mappings(
2302                 vec![
2303                     (GuestAddress(0), 0xD000_0000),
2304                     (GuestAddress(0x1_0000_0000), 0x8_0000),
2305                 ],
2306                 &[test_file_backed_mapping(0x1_0000_1000, 0x2000)]
2307             ),
2308             vec![
2309                 (GuestAddress(0), 0xD000_0000),
2310                 (GuestAddress(0x1_0000_0000), 0x1000),
2311                 (GuestAddress(0x1_0000_3000), 0x8_0000 - 0x3000),
2312             ]
2313         );
2314 
2315         // File mapping overlapping two guest memory regions.
2316         assert_eq!(
2317             punch_holes_in_guest_mem_layout_for_mappings(
2318                 vec![
2319                     (GuestAddress(0), 0xD000_0000),
2320                     (GuestAddress(0x1_0000_0000), 0x8_0000),
2321                 ],
2322                 &[test_file_backed_mapping(0xA000_0000, 0x60002000)]
2323             ),
2324             vec![
2325                 (GuestAddress(0), 0xA000_0000),
2326                 (GuestAddress(0x1_0000_2000), 0x8_0000 - 0x2000),
2327             ]
2328         );
2329     }
2330 }
2331