1 // Copyright 2022 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #[cfg(target_os = "android")]
6 mod android;
7 pub mod cmdline;
8 pub mod config;
9 mod device_helpers;
10 #[cfg(feature = "gpu")]
11 pub(crate) mod gpu;
12 mod vcpu;
13
14 use std::cmp::max;
15 use std::cmp::Reverse;
16 use std::collections::BTreeMap;
17 use std::collections::BTreeSet;
18 use std::collections::HashMap;
19 use std::collections::HashSet;
20 use std::convert::TryInto;
21 use std::ffi::CString;
22 use std::fs::File;
23 use std::fs::OpenOptions;
24 use std::hash::Hash;
25 use std::io::prelude::*;
26 use std::io::stdin;
27 use std::iter;
28 use std::mem;
29 use std::ops::RangeInclusive;
30 use std::os::unix::prelude::OpenOptionsExt;
31 use std::os::unix::process::ExitStatusExt;
32 use std::path::Path;
33 use std::process;
34 use std::rc::Rc;
35 use std::sync::mpsc;
36 use std::sync::Arc;
37 use std::sync::Barrier;
38 #[cfg(feature = "balloon")]
39 use std::time::Duration;
40
41 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
42 use aarch64::AArch64 as Arch;
43 use acpi_tables::sdt::SDT;
44 use anyhow::anyhow;
45 use anyhow::bail;
46 use anyhow::Context;
47 use anyhow::Result;
48 use arch::LinuxArch;
49 use arch::RunnableLinuxVm;
50 use arch::VcpuAffinity;
51 use arch::VirtioDeviceStub;
52 use arch::VmComponents;
53 use arch::VmImage;
54 use base::ReadNotifier;
55 #[cfg(feature = "balloon")]
56 use base::UnixSeqpacket;
57 use base::UnixSeqpacketListener;
58 use base::UnlinkUnixSeqpacketListener;
59 use base::*;
60 use cros_async::Executor;
61 use device_helpers::*;
62 use devices::create_devices_worker_thread;
63 use devices::serial_device::SerialHardware;
64 use devices::vfio::VfioCommonSetup;
65 use devices::vfio::VfioCommonTrait;
66 #[cfg(feature = "gpu")]
67 use devices::virtio;
68 use devices::virtio::device_constants::video::VideoDeviceType;
69 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
70 use devices::virtio::memory_mapper::MemoryMapper;
71 use devices::virtio::memory_mapper::MemoryMapperTrait;
72 use devices::virtio::vhost::user::VhostUserListener;
73 use devices::virtio::vhost::user::VhostUserListenerTrait;
74 #[cfg(feature = "balloon")]
75 use devices::virtio::BalloonFeatures;
76 #[cfg(feature = "balloon")]
77 use devices::virtio::BalloonMode;
78 #[cfg(feature = "gpu")]
79 use devices::virtio::EventDevice;
80 use devices::virtio::VirtioTransportType;
81 #[cfg(feature = "audio")]
82 use devices::Ac97Dev;
83 use devices::Bus;
84 use devices::BusDeviceObj;
85 use devices::CoIommuDev;
86 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
87 #[cfg(feature = "geniezone")]
88 use devices::GeniezoneKernelIrqChip;
89 #[cfg(feature = "usb")]
90 use devices::HostBackendDeviceProvider;
91 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
92 use devices::HostHotPlugKey;
93 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
94 use devices::HotPlugBus;
95 use devices::IommuDevType;
96 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
97 use devices::IrqChipAArch64 as IrqChipArch;
98 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
99 use devices::IrqChipX86_64 as IrqChipArch;
100 use devices::IrqEventIndex;
101 use devices::IrqEventSource;
102 use devices::KvmKernelIrqChip;
103 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
104 use devices::KvmSplitIrqChip;
105 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
106 use devices::PciAddress;
107 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
108 use devices::PciBridge;
109 use devices::PciDevice;
110 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
111 use devices::PciRoot;
112 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
113 use devices::PciRootCommand;
114 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
115 use devices::PcieDownstreamPort;
116 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
117 use devices::PcieHostPort;
118 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
119 use devices::PcieRootPort;
120 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
121 use devices::PcieUpstreamPort;
122 use devices::PvPanicCode;
123 use devices::PvPanicPciDevice;
124 use devices::StubPciDevice;
125 use devices::VirtioMmioDevice;
126 use devices::VirtioPciDevice;
127 #[cfg(feature = "usb")]
128 use devices::XhciController;
129 #[cfg(feature = "gpu")]
130 use gpu::*;
131 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
132 #[cfg(feature = "geniezone")]
133 use hypervisor::geniezone::Geniezone;
134 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
135 #[cfg(feature = "geniezone")]
136 use hypervisor::geniezone::GeniezoneVcpu;
137 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
138 #[cfg(feature = "geniezone")]
139 use hypervisor::geniezone::GeniezoneVm;
140 use hypervisor::kvm::Kvm;
141 use hypervisor::kvm::KvmVcpu;
142 use hypervisor::kvm::KvmVm;
143 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
144 use hypervisor::CpuConfigX86_64;
145 use hypervisor::Hypervisor;
146 use hypervisor::HypervisorCap;
147 use hypervisor::ProtectionType;
148 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
149 use hypervisor::VcpuAArch64 as VcpuArch;
150 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
151 use hypervisor::VcpuX86_64 as VcpuArch;
152 use hypervisor::Vm;
153 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
154 use hypervisor::VmAArch64 as VmArch;
155 use hypervisor::VmCap;
156 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
157 use hypervisor::VmX86_64 as VmArch;
158 use jail::*;
159 use libc;
160 use minijail::Minijail;
161 use resources::AddressRange;
162 use resources::Alloc;
163 #[cfg(feature = "direct")]
164 use resources::Error as ResourceError;
165 use resources::SystemAllocator;
166 use rutabaga_gfx::RutabagaGralloc;
167 use serde::Serialize;
168 use smallvec::SmallVec;
169 #[cfg(feature = "swap")]
170 use swap::SwapController;
171 use sync::Condvar;
172 use sync::Mutex;
173 use vm_control::*;
174 use vm_memory::GuestAddress;
175 use vm_memory::GuestMemory;
176 use vm_memory::MemoryPolicy;
177 use vm_memory::MemoryRegionOptions;
178 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
179 use x86_64::msr::get_override_msr_list;
180 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
181 use x86_64::X8664arch as Arch;
182
183 use crate::crosvm::config::Config;
184 use crate::crosvm::config::Executable;
185 use crate::crosvm::config::FileBackedMappingParameters;
186 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
187 use crate::crosvm::config::HostPcieRootPortParameters;
188 use crate::crosvm::config::HypervisorKind;
189 use crate::crosvm::config::SharedDir;
190 use crate::crosvm::config::SharedDirKind;
191 #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), feature = "gdb"))]
192 use crate::crosvm::gdb::gdb_thread;
193 #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), feature = "gdb"))]
194 use crate::crosvm::gdb::GdbStub;
195 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), unix))]
196 use crate::crosvm::ratelimit::Ratelimit;
197 use crate::crosvm::sys::cmdline::DevicesCommand;
198
199 const KVM_PATH: &str = "/dev/kvm";
200 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
201 #[cfg(feature = "geniezone")]
202 const GENIEZONE_PATH: &str = "/dev/gzvm";
203 #[cfg(all(any(target_arch = "arm", target_arch = "aarch64"), feature = "gunyah"))]
204 static GUNYAH_PATH: &str = "/dev/gunyah";
205
create_virtio_devices( cfg: &Config, vm: &mut impl Vm, resources: &mut SystemAllocator, #[cfg_attr(not(feature = "gpu"), allow(unused_variables))] vm_evt_wrtube: &SendTube, #[cfg(feature = "balloon")] balloon_device_tube: Option<Tube>, #[cfg(feature = "balloon")] balloon_wss_device_tube: Option<Tube>, #[cfg(feature = "balloon")] balloon_inflate_tube: Option<Tube>, #[cfg(feature = "balloon")] init_balloon_size: u64, disk_device_tubes: &mut Vec<Tube>, pmem_device_tubes: &mut Vec<Tube>, fs_device_tubes: &mut Vec<Tube>, #[cfg(feature = "gpu")] gpu_control_tube: Tube, #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>, vvu_proxy_device_tubes: &mut Vec<Tube>, vvu_proxy_max_sibling_mem_size: u64, #[cfg_attr(not(feature = "balloon"), allow(unused_variables))] registered_evt_q: &SendTube, ) -> DeviceResult<Vec<VirtioDeviceStub>>206 fn create_virtio_devices(
207 cfg: &Config,
208 vm: &mut impl Vm,
209 resources: &mut SystemAllocator,
210 #[cfg_attr(not(feature = "gpu"), allow(unused_variables))] vm_evt_wrtube: &SendTube,
211 #[cfg(feature = "balloon")] balloon_device_tube: Option<Tube>,
212 #[cfg(feature = "balloon")] balloon_wss_device_tube: Option<Tube>,
213 #[cfg(feature = "balloon")] balloon_inflate_tube: Option<Tube>,
214 #[cfg(feature = "balloon")] init_balloon_size: u64,
215 disk_device_tubes: &mut Vec<Tube>,
216 pmem_device_tubes: &mut Vec<Tube>,
217 fs_device_tubes: &mut Vec<Tube>,
218 #[cfg(feature = "gpu")] gpu_control_tube: Tube,
219 #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>,
220 vvu_proxy_device_tubes: &mut Vec<Tube>,
221 vvu_proxy_max_sibling_mem_size: u64,
222 #[cfg_attr(not(feature = "balloon"), allow(unused_variables))] registered_evt_q: &SendTube,
223 ) -> DeviceResult<Vec<VirtioDeviceStub>> {
224 let mut devs = Vec::new();
225
226 for opt in &cfg.vhost_user_gpu {
227 devs.push(create_vhost_user_gpu_device(cfg.protection_type, opt)?);
228 }
229
230 for opt in &cfg.vvu_proxy {
231 devs.push(create_vvu_proxy_device(
232 cfg.protection_type,
233 &cfg.jail_config,
234 opt,
235 vvu_proxy_device_tubes.remove(0),
236 vvu_proxy_max_sibling_mem_size,
237 )?);
238 }
239
240 #[cfg(any(feature = "gpu", feature = "video-decoder", feature = "video-encoder"))]
241 let mut resource_bridges = Vec::<Tube>::new();
242
243 if !cfg.wayland_socket_paths.is_empty() {
244 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
245 let mut wl_resource_bridge = None::<Tube>;
246
247 #[cfg(feature = "gpu")]
248 {
249 if cfg.gpu_parameters.is_some() {
250 let (wl_socket, gpu_socket) = Tube::pair().context("failed to create tube")?;
251 resource_bridges.push(gpu_socket);
252 wl_resource_bridge = Some(wl_socket);
253 }
254 }
255
256 devs.push(create_wayland_device(
257 cfg.protection_type,
258 &cfg.jail_config,
259 &cfg.wayland_socket_paths,
260 wl_resource_bridge,
261 )?);
262 }
263
264 #[cfg(feature = "video-decoder")]
265 let video_dec_cfg = cfg
266 .video_dec
267 .iter()
268 .map(|config| {
269 let (video_tube, gpu_tube) =
270 Tube::pair().expect("failed to create tube for video decoder");
271 resource_bridges.push(gpu_tube);
272 (video_tube, config.backend)
273 })
274 .collect::<Vec<_>>();
275
276 #[cfg(feature = "video-encoder")]
277 let video_enc_cfg = cfg
278 .video_enc
279 .iter()
280 .map(|config| {
281 let (video_tube, gpu_tube) =
282 Tube::pair().expect("failed to create tube for video encoder");
283 resource_bridges.push(gpu_tube);
284 (video_tube, config.backend)
285 })
286 .collect::<Vec<_>>();
287
288 #[cfg(feature = "gpu")]
289 {
290 if let Some(gpu_parameters) = &cfg.gpu_parameters {
291 let display_param = if gpu_parameters.display_params.is_empty() {
292 Default::default()
293 } else {
294 gpu_parameters.display_params[0].clone()
295 };
296 let (gpu_display_w, gpu_display_h) = display_param.get_virtual_display_size();
297
298 let mut event_devices = Vec::new();
299 if cfg.display_window_mouse {
300 let (event_device_socket, virtio_dev_socket) =
301 StreamChannel::pair(BlockingMode::Nonblocking, FramingMode::Byte)
302 .context("failed to create socket")?;
303 let (multi_touch_width, multi_touch_height) = cfg
304 .virtio_multi_touch
305 .first()
306 .as_ref()
307 .map(|multi_touch_spec| multi_touch_spec.get_size())
308 .unwrap_or((gpu_display_w, gpu_display_h));
309 let dev = virtio::new_multi_touch(
310 // u32::MAX is the least likely to collide with the indices generated above for
311 // the multi_touch options, which begin at 0.
312 u32::MAX,
313 virtio_dev_socket,
314 multi_touch_width,
315 multi_touch_height,
316 virtio::base_features(cfg.protection_type),
317 )
318 .context("failed to set up mouse device")?;
319 devs.push(VirtioDeviceStub {
320 dev: Box::new(dev),
321 jail: simple_jail(&cfg.jail_config, "input_device")?,
322 });
323 event_devices.push(EventDevice::touchscreen(event_device_socket));
324 }
325 if cfg.display_window_keyboard {
326 let (event_device_socket, virtio_dev_socket) =
327 StreamChannel::pair(BlockingMode::Nonblocking, FramingMode::Byte)
328 .context("failed to create socket")?;
329 let dev = virtio::new_keyboard(
330 // u32::MAX is the least likely to collide with the indices generated above for
331 // the multi_touch options, which begin at 0.
332 u32::MAX,
333 virtio_dev_socket,
334 virtio::base_features(cfg.protection_type),
335 )
336 .context("failed to set up keyboard device")?;
337 devs.push(VirtioDeviceStub {
338 dev: Box::new(dev),
339 jail: simple_jail(&cfg.jail_config, "input_device")?,
340 });
341 event_devices.push(EventDevice::keyboard(event_device_socket));
342 }
343
344 devs.push(create_gpu_device(
345 cfg,
346 vm_evt_wrtube,
347 gpu_control_tube,
348 resource_bridges,
349 // Use the unnamed socket for GPU display screens.
350 cfg.wayland_socket_paths.get(""),
351 cfg.x_display.clone(),
352 render_server_fd,
353 event_devices,
354 )?);
355 }
356 }
357
358 for (_, param) in cfg
359 .serial_parameters
360 .iter()
361 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
362 {
363 let dev = param.create_virtio_device_and_jail(cfg.protection_type, &cfg.jail_config)?;
364 devs.push(dev);
365 }
366
367 for disk in &cfg.disks {
368 let disk_config = DiskConfig::new(disk, Some(disk_device_tubes.remove(0)));
369 devs.push(
370 disk_config.create_virtio_device_and_jail(cfg.protection_type, &cfg.jail_config)?,
371 );
372 }
373
374 for blk in &cfg.vhost_user_blk {
375 devs.push(create_vhost_user_block_device(cfg.protection_type, blk)?);
376 }
377
378 for console in &cfg.vhost_user_console {
379 devs.push(create_vhost_user_console_device(
380 cfg.protection_type,
381 console,
382 )?);
383 }
384
385 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
386 let pmem_device_tube = pmem_device_tubes.remove(0);
387 devs.push(create_pmem_device(
388 cfg.protection_type,
389 &cfg.jail_config,
390 vm,
391 resources,
392 pmem_disk,
393 index,
394 pmem_device_tube,
395 )?);
396 }
397
398 if cfg.rng {
399 devs.push(create_rng_device(cfg.protection_type, &cfg.jail_config)?);
400 }
401
402 #[cfg(feature = "tpm")]
403 {
404 if cfg.software_tpm {
405 devs.push(create_software_tpm_device(
406 cfg.protection_type,
407 &cfg.jail_config,
408 )?);
409 }
410 }
411
412 #[cfg(all(feature = "vtpm", target_arch = "x86_64"))]
413 {
414 if cfg.vtpm_proxy {
415 devs.push(create_vtpm_proxy_device(
416 cfg.protection_type,
417 &cfg.jail_config,
418 )?);
419 }
420 }
421
422 for (idx, single_touch_spec) in cfg.virtio_single_touch.iter().enumerate() {
423 devs.push(create_single_touch_device(
424 cfg.protection_type,
425 &cfg.jail_config,
426 single_touch_spec,
427 idx as u32,
428 )?);
429 }
430
431 for (idx, multi_touch_spec) in cfg.virtio_multi_touch.iter().enumerate() {
432 devs.push(create_multi_touch_device(
433 cfg.protection_type,
434 &cfg.jail_config,
435 multi_touch_spec,
436 idx as u32,
437 )?);
438 }
439
440 for (idx, trackpad_spec) in cfg.virtio_trackpad.iter().enumerate() {
441 devs.push(create_trackpad_device(
442 cfg.protection_type,
443 &cfg.jail_config,
444 trackpad_spec,
445 idx as u32,
446 )?);
447 }
448
449 for (idx, mouse_socket) in cfg.virtio_mice.iter().enumerate() {
450 devs.push(create_mouse_device(
451 cfg.protection_type,
452 &cfg.jail_config,
453 mouse_socket,
454 idx as u32,
455 )?);
456 }
457
458 for (idx, keyboard_socket) in cfg.virtio_keyboard.iter().enumerate() {
459 devs.push(create_keyboard_device(
460 cfg.protection_type,
461 &cfg.jail_config,
462 keyboard_socket,
463 idx as u32,
464 )?);
465 }
466
467 for (idx, switches_socket) in cfg.virtio_switches.iter().enumerate() {
468 devs.push(create_switches_device(
469 cfg.protection_type,
470 &cfg.jail_config,
471 switches_socket,
472 idx as u32,
473 )?);
474 }
475
476 for dev_path in &cfg.virtio_input_evdevs {
477 devs.push(create_vinput_device(
478 cfg.protection_type,
479 &cfg.jail_config,
480 dev_path,
481 )?);
482 }
483
484 #[cfg(feature = "balloon")]
485 if let Some(balloon_device_tube) = balloon_device_tube {
486 let balloon_features = (cfg.balloon_page_reporting as u64)
487 << BalloonFeatures::PageReporting as u64
488 | (cfg.balloon_wss_reporting as u64) << BalloonFeatures::WSSReporting as u64;
489 devs.push(create_balloon_device(
490 cfg.protection_type,
491 &cfg.jail_config,
492 if cfg.strict_balloon {
493 BalloonMode::Strict
494 } else {
495 BalloonMode::Relaxed
496 },
497 balloon_device_tube,
498 balloon_wss_device_tube,
499 balloon_inflate_tube,
500 init_balloon_size,
501 balloon_features,
502 Some(
503 registered_evt_q
504 .try_clone()
505 .context("failed to clone registered_evt_q tube")?,
506 ),
507 )?);
508 }
509
510 for opt in &cfg.net {
511 let vq_pairs = opt.vq_pairs.unwrap_or(1);
512 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
513 let multi_vq = vq_pairs > 1 && !opt.vhost_net;
514 let (tap, mac) = create_tap_for_net_device(&opt.mode, multi_vq)?;
515 let dev = if opt.vhost_net {
516 create_virtio_vhost_net_device_from_tap(
517 cfg.protection_type,
518 &cfg.jail_config,
519 vq_pairs,
520 vcpu_count,
521 cfg.vhost_net_device_path.clone(),
522 tap,
523 mac,
524 )
525 } else {
526 create_virtio_net_device_from_tap(
527 cfg.protection_type,
528 &cfg.jail_config,
529 vq_pairs,
530 vcpu_count,
531 tap,
532 mac,
533 )
534 }?;
535 devs.push(dev);
536 }
537
538 for net in &cfg.vhost_user_net {
539 devs.push(create_vhost_user_net_device(cfg.protection_type, net)?);
540 }
541
542 for vsock in &cfg.vhost_user_vsock {
543 devs.push(create_vhost_user_vsock_device(cfg.protection_type, vsock)?);
544 }
545
546 for opt in &cfg.vhost_user_wl {
547 devs.push(create_vhost_user_wl_device(cfg.protection_type, opt)?);
548 }
549
550 #[cfg(feature = "audio")]
551 {
552 for virtio_snd in &cfg.virtio_snds {
553 devs.push(create_virtio_snd_device(
554 cfg.protection_type,
555 &cfg.jail_config,
556 virtio_snd.clone(),
557 )?);
558 }
559 }
560
561 #[cfg(feature = "video-decoder")]
562 {
563 for (tube, backend) in video_dec_cfg {
564 register_video_device(
565 backend,
566 &mut devs,
567 tube,
568 cfg.protection_type,
569 &cfg.jail_config,
570 VideoDeviceType::Decoder,
571 )?;
572 }
573 }
574 for socket_path in &cfg.vhost_user_video_dec {
575 devs.push(create_vhost_user_video_device(
576 cfg.protection_type,
577 socket_path,
578 VideoDeviceType::Decoder,
579 )?);
580 }
581
582 #[cfg(feature = "video-encoder")]
583 {
584 for (tube, backend) in video_enc_cfg {
585 register_video_device(
586 backend,
587 &mut devs,
588 tube,
589 cfg.protection_type,
590 &cfg.jail_config,
591 VideoDeviceType::Encoder,
592 )?;
593 }
594 }
595
596 if let Some(vsock_config) = &cfg.vsock {
597 devs.push(
598 vsock_config.create_virtio_device_and_jail(cfg.protection_type, &cfg.jail_config)?,
599 );
600 }
601
602 for vhost_user_fs in &cfg.vhost_user_fs {
603 devs.push(create_vhost_user_fs_device(
604 cfg.protection_type,
605 vhost_user_fs,
606 )?);
607 }
608
609 for vhost_user_snd in &cfg.vhost_user_snd {
610 devs.push(create_vhost_user_snd_device(
611 cfg.protection_type,
612 vhost_user_snd,
613 )?);
614 }
615
616 for shared_dir in &cfg.shared_dirs {
617 let SharedDir {
618 src,
619 tag,
620 kind,
621 uid_map,
622 gid_map,
623 fs_cfg,
624 p9_cfg,
625 } = shared_dir;
626
627 let dev = match kind {
628 SharedDirKind::FS => {
629 let device_tube = fs_device_tubes.remove(0);
630 create_fs_device(
631 cfg.protection_type,
632 &cfg.jail_config,
633 uid_map,
634 gid_map,
635 src,
636 tag,
637 fs_cfg.clone(),
638 device_tube,
639 )?
640 }
641 SharedDirKind::P9 => create_9p_device(
642 cfg.protection_type,
643 &cfg.jail_config,
644 uid_map,
645 gid_map,
646 src,
647 tag,
648 p9_cfg.clone(),
649 )?,
650 };
651 devs.push(dev);
652 }
653
654 if let Some(vhost_user_mac80211_hwsim) = &cfg.vhost_user_mac80211_hwsim {
655 devs.push(create_vhost_user_mac80211_hwsim_device(
656 cfg.protection_type,
657 vhost_user_mac80211_hwsim,
658 )?);
659 }
660
661 #[cfg(feature = "audio")]
662 if let Some(path) = &cfg.sound {
663 devs.push(create_sound_device(
664 path,
665 cfg.protection_type,
666 &cfg.jail_config,
667 )?);
668 }
669
670 Ok(devs)
671 }
672
create_devices( cfg: &Config, vm: &mut impl Vm, resources: &mut SystemAllocator, vm_evt_wrtube: &SendTube, iommu_attached_endpoints: &mut BTreeMap<u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>>, irq_control_tubes: &mut Vec<Tube>, control_tubes: &mut Vec<TaggedControlTube>, #[cfg(feature = "balloon")] balloon_device_tube: Option<Tube>, #[cfg(feature = "balloon")] balloon_wss_device_tube: Option<Tube>, #[cfg(feature = "balloon")] init_balloon_size: u64, disk_device_tubes: &mut Vec<Tube>, pmem_device_tubes: &mut Vec<Tube>, fs_device_tubes: &mut Vec<Tube>, #[cfg(feature = "usb")] usb_provider: HostBackendDeviceProvider, #[cfg(feature = "gpu")] gpu_control_tube: Tube, #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>, vvu_proxy_device_tubes: &mut Vec<Tube>, vvu_proxy_max_sibling_mem_size: u64, iova_max_addr: &mut Option<u64>, registered_evt_q: &SendTube, ) -> DeviceResult<Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>>673 fn create_devices(
674 cfg: &Config,
675 vm: &mut impl Vm,
676 resources: &mut SystemAllocator,
677 vm_evt_wrtube: &SendTube,
678 iommu_attached_endpoints: &mut BTreeMap<u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>>,
679 irq_control_tubes: &mut Vec<Tube>,
680 control_tubes: &mut Vec<TaggedControlTube>,
681 #[cfg(feature = "balloon")] balloon_device_tube: Option<Tube>,
682 #[cfg(feature = "balloon")] balloon_wss_device_tube: Option<Tube>,
683 #[cfg(feature = "balloon")] init_balloon_size: u64,
684 disk_device_tubes: &mut Vec<Tube>,
685 pmem_device_tubes: &mut Vec<Tube>,
686 fs_device_tubes: &mut Vec<Tube>,
687 #[cfg(feature = "usb")] usb_provider: HostBackendDeviceProvider,
688 #[cfg(feature = "gpu")] gpu_control_tube: Tube,
689 #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>,
690 vvu_proxy_device_tubes: &mut Vec<Tube>,
691 vvu_proxy_max_sibling_mem_size: u64,
692 iova_max_addr: &mut Option<u64>,
693 registered_evt_q: &SendTube,
694 ) -> DeviceResult<Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>> {
695 let mut devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)> = Vec::new();
696 #[cfg(feature = "balloon")]
697 let mut balloon_inflate_tube: Option<Tube> = None;
698 if !cfg.vfio.is_empty() {
699 let mut coiommu_attached_endpoints = Vec::new();
700
701 for vfio_dev in &cfg.vfio {
702 let (dev, jail, viommu_mapper) = create_vfio_device(
703 &cfg.jail_config,
704 vm,
705 resources,
706 irq_control_tubes,
707 control_tubes,
708 &vfio_dev.path,
709 false,
710 None,
711 vfio_dev.guest_address,
712 Some(&mut coiommu_attached_endpoints),
713 vfio_dev.iommu,
714 #[cfg(feature = "direct")]
715 vfio_dev.intel_lpss,
716 )?;
717 match dev {
718 VfioDeviceVariant::Pci(vfio_pci_device) => {
719 *iova_max_addr = Some(max(
720 vfio_pci_device.get_max_iova(),
721 iova_max_addr.unwrap_or(0),
722 ));
723
724 if let Some(viommu_mapper) = viommu_mapper {
725 iommu_attached_endpoints.insert(
726 vfio_pci_device
727 .pci_address()
728 .context("not initialized")?
729 .to_u32(),
730 Arc::new(Mutex::new(Box::new(viommu_mapper))),
731 );
732 }
733
734 devices.push((Box::new(vfio_pci_device), jail));
735 }
736 VfioDeviceVariant::Platform(vfio_plat_dev) => {
737 devices.push((Box::new(vfio_plat_dev), jail));
738 }
739 }
740 }
741
742 if !coiommu_attached_endpoints.is_empty() || !iommu_attached_endpoints.is_empty() {
743 let mut buf = mem::MaybeUninit::<libc::rlimit64>::zeroed();
744 let res = unsafe { libc::getrlimit64(libc::RLIMIT_MEMLOCK, buf.as_mut_ptr()) };
745 if res == 0 {
746 let limit = unsafe { buf.assume_init() };
747 let rlim_new = limit.rlim_cur.saturating_add(vm.get_memory().memory_size());
748 let rlim_max = max(limit.rlim_max, rlim_new);
749 if limit.rlim_cur < rlim_new {
750 let limit_arg = libc::rlimit64 {
751 rlim_cur: rlim_new,
752 rlim_max,
753 };
754 let res = unsafe { libc::setrlimit64(libc::RLIMIT_MEMLOCK, &limit_arg) };
755 if res != 0 {
756 bail!("Set rlimit failed");
757 }
758 }
759 } else {
760 bail!("Get rlimit failed");
761 }
762 }
763 #[cfg(feature = "balloon")]
764 let coiommu_tube: Option<Tube>;
765 #[cfg(not(feature = "balloon"))]
766 let coiommu_tube: Option<Tube> = None;
767 if !coiommu_attached_endpoints.is_empty() {
768 let vfio_container =
769 VfioCommonSetup::vfio_get_container(IommuDevType::CoIommu, None as Option<&Path>)
770 .context("failed to get vfio container")?;
771 let (coiommu_host_tube, coiommu_device_tube) =
772 Tube::pair().context("failed to create coiommu tube")?;
773 control_tubes.push(TaggedControlTube::VmMemory {
774 tube: coiommu_host_tube,
775 expose_with_viommu: false,
776 });
777 let vcpu_count = cfg.vcpu_count.unwrap_or(1) as u64;
778 #[cfg(feature = "balloon")]
779 match Tube::pair() {
780 Ok((x, y)) => {
781 coiommu_tube = Some(x);
782 balloon_inflate_tube = Some(y);
783 }
784 Err(x) => return Err(x).context("failed to create coiommu tube"),
785 }
786 let dev = CoIommuDev::new(
787 vm.get_memory().clone(),
788 vfio_container,
789 coiommu_device_tube,
790 coiommu_tube,
791 coiommu_attached_endpoints,
792 vcpu_count,
793 cfg.coiommu_param.unwrap_or_default(),
794 )
795 .context("failed to create coiommu device")?;
796
797 devices.push((
798 Box::new(dev),
799 simple_jail(&cfg.jail_config, "coiommu_device")?,
800 ));
801 }
802 }
803
804 let stubs = create_virtio_devices(
805 cfg,
806 vm,
807 resources,
808 vm_evt_wrtube,
809 #[cfg(feature = "balloon")]
810 balloon_device_tube,
811 #[cfg(feature = "balloon")]
812 balloon_wss_device_tube,
813 #[cfg(feature = "balloon")]
814 balloon_inflate_tube,
815 #[cfg(feature = "balloon")]
816 init_balloon_size,
817 disk_device_tubes,
818 pmem_device_tubes,
819 fs_device_tubes,
820 #[cfg(feature = "gpu")]
821 gpu_control_tube,
822 #[cfg(feature = "gpu")]
823 render_server_fd,
824 vvu_proxy_device_tubes,
825 vvu_proxy_max_sibling_mem_size,
826 registered_evt_q,
827 )?;
828
829 for stub in stubs {
830 match stub.dev.transport_type() {
831 VirtioTransportType::Pci => {
832 let (msi_host_tube, msi_device_tube) =
833 Tube::pair().context("failed to create tube")?;
834 irq_control_tubes.push(msi_host_tube);
835
836 let shared_memory_tube = if stub.dev.get_shared_memory_region().is_some() {
837 let (host_tube, device_tube) =
838 Tube::pair().context("failed to create VVU proxy tube")?;
839 control_tubes.push(TaggedControlTube::VmMemory {
840 tube: host_tube,
841 expose_with_viommu: stub.dev.expose_shmem_descriptors_with_viommu(),
842 });
843 Some(device_tube)
844 } else {
845 None
846 };
847
848 let (ioevent_host_tube, ioevent_device_tube) =
849 Tube::pair().context("failed to create ioevent tube")?;
850 control_tubes.push(TaggedControlTube::VmMemory {
851 tube: ioevent_host_tube,
852 expose_with_viommu: false,
853 });
854
855 let dev = VirtioPciDevice::new(
856 vm.get_memory().clone(),
857 stub.dev,
858 msi_device_tube,
859 cfg.disable_virtio_intx,
860 shared_memory_tube,
861 ioevent_device_tube,
862 )
863 .context("failed to create virtio pci dev")?;
864
865 devices.push((Box::new(dev) as Box<dyn BusDeviceObj>, stub.jail));
866 }
867 VirtioTransportType::Mmio => {
868 let dev = VirtioMmioDevice::new(vm.get_memory().clone(), stub.dev, false)
869 .context("failed to create virtio mmio dev")?;
870 devices.push((Box::new(dev) as Box<dyn BusDeviceObj>, stub.jail));
871 }
872 }
873 }
874
875 #[cfg(feature = "audio")]
876 for ac97_param in &cfg.ac97_parameters {
877 let dev = Ac97Dev::try_new(vm.get_memory().clone(), ac97_param.clone())
878 .context("failed to create ac97 device")?;
879 let jail = simple_jail(&cfg.jail_config, dev.minijail_policy())?;
880 devices.push((Box::new(dev), jail));
881 }
882
883 #[cfg(feature = "usb")]
884 if cfg.usb {
885 // Create xhci controller.
886 let usb_controller = Box::new(XhciController::new(vm.get_memory().clone(), usb_provider));
887 devices.push((
888 usb_controller,
889 simple_jail(&cfg.jail_config, "xhci_device")?,
890 ));
891 }
892
893 for params in &cfg.stub_pci_devices {
894 // Stub devices don't need jailing since they don't do anything.
895 devices.push((Box::new(StubPciDevice::new(params)), None));
896 }
897
898 devices.push((
899 Box::new(PvPanicPciDevice::new(vm_evt_wrtube.try_clone()?)),
900 None,
901 ));
902
903 Ok(devices)
904 }
905
create_file_backed_mappings( cfg: &Config, vm: &mut impl Vm, resources: &mut SystemAllocator, ) -> Result<()>906 fn create_file_backed_mappings(
907 cfg: &Config,
908 vm: &mut impl Vm,
909 resources: &mut SystemAllocator,
910 ) -> Result<()> {
911 for mapping in &cfg.file_backed_mappings {
912 let file = OpenOptions::new()
913 .read(true)
914 .write(mapping.writable)
915 .custom_flags(if mapping.sync { libc::O_SYNC } else { 0 })
916 .open(&mapping.path)
917 .context("failed to open file for file-backed mapping")?;
918 let prot = if mapping.writable {
919 Protection::read_write()
920 } else {
921 Protection::read()
922 };
923 let size = mapping
924 .size
925 .try_into()
926 .context("Invalid size for file-backed mapping")?;
927 let memory_mapping = MemoryMappingBuilder::new(size)
928 .from_file(&file)
929 .offset(mapping.offset)
930 .protection(prot)
931 .build()
932 .context("failed to map backing file for file-backed mapping")?;
933
934 let mapping_range = AddressRange::from_start_and_size(mapping.address, mapping.size)
935 .context("failed to convert to AddressRange")?;
936 match resources.mmio_allocator_any().allocate_at(
937 mapping_range,
938 Alloc::FileBacked(mapping.address),
939 "file-backed mapping".to_owned(),
940 ) {
941 // OutOfSpace just means that this mapping is not in the MMIO regions at all, so don't
942 // consider it an error.
943 // TODO(b/222769529): Reserve this region in a global memory address space allocator once
944 // we have that so nothing else can accidentally overlap with it.
945 Ok(()) | Err(resources::Error::OutOfSpace) => {}
946 e => e.context("failed to allocate guest address for file-backed mapping")?,
947 }
948
949 vm.add_memory_region(
950 GuestAddress(mapping.address),
951 Box::new(memory_mapping),
952 !mapping.writable,
953 /* log_dirty_pages = */ false,
954 )
955 .context("failed to configure file-backed mapping")?;
956 }
957
958 Ok(())
959 }
960
961 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
create_pcie_root_port( host_pcie_rp: Vec<HostPcieRootPortParameters>, sys_allocator: &mut SystemAllocator, irq_control_tubes: &mut Vec<Tube>, control_tubes: &mut Vec<TaggedControlTube>, devices: &mut Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>, hp_vec: &mut Vec<(u8, Arc<Mutex<dyn HotPlugBus>>)>, hp_endpoints_ranges: &mut Vec<RangeInclusive<u32>>, #[allow(clippy::ptr_arg)] gpe_notify_devs: &mut Vec<(u32, Arc<Mutex<dyn GpeNotify>>)>, #[allow(clippy::ptr_arg)] pme_notify_devs: &mut Vec<(u8, Arc<Mutex<dyn PmeNotify>>)>, ) -> Result<()>962 fn create_pcie_root_port(
963 host_pcie_rp: Vec<HostPcieRootPortParameters>,
964 sys_allocator: &mut SystemAllocator,
965 irq_control_tubes: &mut Vec<Tube>,
966 control_tubes: &mut Vec<TaggedControlTube>,
967 devices: &mut Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
968 hp_vec: &mut Vec<(u8, Arc<Mutex<dyn HotPlugBus>>)>,
969 hp_endpoints_ranges: &mut Vec<RangeInclusive<u32>>,
970 // TODO(b/228627457): clippy is incorrectly warning about this Vec, which needs to be a Vec so
971 // we can push into it
972 #[allow(clippy::ptr_arg)] gpe_notify_devs: &mut Vec<(u32, Arc<Mutex<dyn GpeNotify>>)>,
973 #[allow(clippy::ptr_arg)] pme_notify_devs: &mut Vec<(u8, Arc<Mutex<dyn PmeNotify>>)>,
974 ) -> Result<()> {
975 if host_pcie_rp.is_empty() {
976 // user doesn't specify host pcie root port which link to this virtual pcie rp,
977 // find the empty bus and create a total virtual pcie rp
978 let mut hp_sec_bus = 0u8;
979 // Create Pcie Root Port for non-root buses, each non-root bus device will be
980 // connected behind a virtual pcie root port.
981 for i in 1..255 {
982 if sys_allocator.pci_bus_empty(i) {
983 if hp_sec_bus == 0 {
984 hp_sec_bus = i;
985 }
986 continue;
987 }
988 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new(i, false)));
989 pme_notify_devs.push((i, pcie_root_port.clone() as Arc<Mutex<dyn PmeNotify>>));
990 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
991 irq_control_tubes.push(msi_host_tube);
992 let pci_bridge = Box::new(PciBridge::new(pcie_root_port.clone(), msi_device_tube));
993 // no ipc is used if the root port disables hotplug
994 devices.push((pci_bridge, None));
995 }
996
997 // Create Pcie Root Port for hot-plug
998 if hp_sec_bus == 0 {
999 return Err(anyhow!("no more addresses are available"));
1000 }
1001 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new(hp_sec_bus, true)));
1002 pme_notify_devs.push((
1003 hp_sec_bus,
1004 pcie_root_port.clone() as Arc<Mutex<dyn PmeNotify>>,
1005 ));
1006 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
1007 irq_control_tubes.push(msi_host_tube);
1008 let pci_bridge = Box::new(PciBridge::new(pcie_root_port.clone(), msi_device_tube));
1009
1010 hp_endpoints_ranges.push(RangeInclusive::new(
1011 PciAddress {
1012 bus: pci_bridge.get_secondary_num(),
1013 dev: 0,
1014 func: 0,
1015 }
1016 .to_u32(),
1017 PciAddress {
1018 bus: pci_bridge.get_subordinate_num(),
1019 dev: 32,
1020 func: 8,
1021 }
1022 .to_u32(),
1023 ));
1024
1025 devices.push((pci_bridge, None));
1026 hp_vec.push((hp_sec_bus, pcie_root_port as Arc<Mutex<dyn HotPlugBus>>));
1027 } else {
1028 // user specify host pcie root port which link to this virtual pcie rp,
1029 // reserve the host pci BDF and create a virtual pcie RP with some attrs same as host
1030 for host_pcie in host_pcie_rp.iter() {
1031 let (vm_host_tube, vm_device_tube) = Tube::pair().context("failed to create tube")?;
1032 let pcie_host = PcieHostPort::new(host_pcie.host_path.as_path(), vm_device_tube)?;
1033 let bus_range = pcie_host.get_bus_range();
1034 let mut slot_implemented = true;
1035 for i in bus_range.secondary..=bus_range.subordinate {
1036 // if this bus is occupied by one vfio-pci device, this vfio-pci device is
1037 // connected to a pci bridge on host statically, then it should be connected
1038 // to a virtual pci bridge in guest statically, this bridge won't have
1039 // hotplug capability and won't use slot.
1040 if !sys_allocator.pci_bus_empty(i) {
1041 slot_implemented = false;
1042 break;
1043 }
1044 }
1045
1046 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new_from_host(
1047 pcie_host,
1048 slot_implemented,
1049 )?));
1050 control_tubes.push(TaggedControlTube::Vm(vm_host_tube));
1051
1052 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
1053 irq_control_tubes.push(msi_host_tube);
1054 let mut pci_bridge = Box::new(PciBridge::new(pcie_root_port.clone(), msi_device_tube));
1055 // early reservation for host pcie root port devices.
1056 let rootport_addr = pci_bridge.allocate_address(sys_allocator);
1057 if rootport_addr.is_err() {
1058 warn!(
1059 "address reservation failed for hot pcie root port {}",
1060 pci_bridge.debug_label()
1061 );
1062 }
1063
1064 // Only append the sub pci range of a hot-pluggable root port to virtio-iommu
1065 if slot_implemented {
1066 hp_endpoints_ranges.push(RangeInclusive::new(
1067 PciAddress {
1068 bus: pci_bridge.get_secondary_num(),
1069 dev: 0,
1070 func: 0,
1071 }
1072 .to_u32(),
1073 PciAddress {
1074 bus: pci_bridge.get_subordinate_num(),
1075 dev: 32,
1076 func: 8,
1077 }
1078 .to_u32(),
1079 ));
1080 }
1081
1082 devices.push((pci_bridge, None));
1083 if slot_implemented {
1084 if let Some(gpe) = host_pcie.hp_gpe {
1085 gpe_notify_devs
1086 .push((gpe, pcie_root_port.clone() as Arc<Mutex<dyn GpeNotify>>));
1087 }
1088 hp_vec.push((
1089 bus_range.secondary,
1090 pcie_root_port as Arc<Mutex<dyn HotPlugBus>>,
1091 ));
1092 }
1093 }
1094 }
1095
1096 Ok(())
1097 }
1098
setup_vm_components(cfg: &Config) -> Result<VmComponents>1099 fn setup_vm_components(cfg: &Config) -> Result<VmComponents> {
1100 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
1101 Some(
1102 open_file(initrd_path, OpenOptions::new().read(true))
1103 .with_context(|| format!("failed to open initrd {}", initrd_path.display()))?,
1104 )
1105 } else {
1106 None
1107 };
1108 let pvm_fw_image = if let Some(pvm_fw_path) = &cfg.pvm_fw {
1109 Some(
1110 open_file(pvm_fw_path, OpenOptions::new().read(true))
1111 .with_context(|| format!("failed to open pvm_fw {}", pvm_fw_path.display()))?,
1112 )
1113 } else {
1114 None
1115 };
1116
1117 let vm_image = match cfg.executable_path {
1118 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
1119 open_file(kernel_path, OpenOptions::new().read(true)).with_context(|| {
1120 format!("failed to open kernel image {}", kernel_path.display())
1121 })?,
1122 ),
1123 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
1124 open_file(bios_path, OpenOptions::new().read(true))
1125 .with_context(|| format!("failed to open bios {}", bios_path.display()))?,
1126 ),
1127 _ => panic!("Did not receive a bios or kernel, should be impossible."),
1128 };
1129
1130 let swiotlb = if let Some(size) = cfg.swiotlb {
1131 Some(
1132 size.checked_mul(1024 * 1024)
1133 .ok_or_else(|| anyhow!("requested swiotlb size too large"))?,
1134 )
1135 } else if matches!(cfg.protection_type, ProtectionType::Unprotected) {
1136 None
1137 } else {
1138 Some(64 * 1024 * 1024)
1139 };
1140
1141 let (pflash_image, pflash_block_size) = if let Some(pflash_parameters) = &cfg.pflash_parameters
1142 {
1143 (
1144 Some(
1145 open_file(
1146 &pflash_parameters.path,
1147 OpenOptions::new().read(true).write(true),
1148 )
1149 .with_context(|| {
1150 format!("failed to open pflash {}", pflash_parameters.path.display())
1151 })?,
1152 ),
1153 pflash_parameters.block_size,
1154 )
1155 } else {
1156 (None, 0)
1157 };
1158
1159 Ok(VmComponents {
1160 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1161 ac_adapter: cfg.ac_adapter,
1162 memory_size: cfg
1163 .memory
1164 .unwrap_or(256)
1165 .checked_mul(1024 * 1024)
1166 .ok_or_else(|| anyhow!("requested memory size too large"))?,
1167 swiotlb,
1168 vcpu_count: cfg.vcpu_count.unwrap_or(1),
1169 vcpu_affinity: cfg.vcpu_affinity.clone(),
1170 cpu_clusters: cfg.cpu_clusters.clone(),
1171 cpu_capacity: cfg.cpu_capacity.clone(),
1172 #[cfg(feature = "direct")]
1173 direct_gpe: cfg.direct_gpe.clone(),
1174 #[cfg(feature = "direct")]
1175 direct_fixed_evts: cfg.direct_fixed_evts.clone(),
1176 no_smt: cfg.no_smt,
1177 hugepages: cfg.hugepages,
1178 hv_cfg: hypervisor::Config {
1179 #[cfg(target_arch = "aarch64")]
1180 mte: cfg.mte,
1181 protection_type: cfg.protection_type,
1182 },
1183 vm_image,
1184 android_fstab: cfg
1185 .android_fstab
1186 .as_ref()
1187 .map(|x| {
1188 File::open(x)
1189 .with_context(|| format!("failed to open android fstab file {}", x.display()))
1190 })
1191 .map_or(Ok(None), |v| v.map(Some))?,
1192 pstore: cfg.pstore.clone(),
1193 pflash_block_size,
1194 pflash_image,
1195 initrd_image,
1196 extra_kernel_params: cfg.params.clone(),
1197 acpi_sdts: cfg
1198 .acpi_tables
1199 .iter()
1200 .map(|path| {
1201 SDT::from_file(path)
1202 .with_context(|| format!("failed to open ACPI file {}", path.display()))
1203 })
1204 .collect::<Result<Vec<SDT>>>()?,
1205 rt_cpus: cfg.rt_cpus.clone(),
1206 delay_rt: cfg.delay_rt,
1207 #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), feature = "gdb"))]
1208 gdb: None,
1209 dmi_path: cfg.dmi_path.clone(),
1210 no_i8042: cfg.no_i8042,
1211 no_rtc: cfg.no_rtc,
1212 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1213 oem_strings: cfg.oem_strings.clone(),
1214 host_cpu_topology: cfg.host_cpu_topology,
1215 itmt: cfg.itmt,
1216 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1217 force_s2idle: cfg.force_s2idle,
1218 pvm_fw: pvm_fw_image,
1219 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1220 pcie_ecam: cfg.pcie_ecam,
1221 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1222 pci_low_start: cfg.pci_low_start,
1223 })
1224 }
1225
1226 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
1227 pub enum ExitState {
1228 Reset,
1229 Stop,
1230 Crash,
1231 GuestPanic,
1232 WatchdogReset,
1233 }
1234 // Remove ranges in `guest_mem_layout` that overlap with ranges in `file_backed_mappings`.
1235 // Returns the updated guest memory layout.
punch_holes_in_guest_mem_layout_for_mappings( guest_mem_layout: Vec<(GuestAddress, u64, MemoryRegionOptions)>, file_backed_mappings: &[FileBackedMappingParameters], ) -> Vec<(GuestAddress, u64, MemoryRegionOptions)>1236 fn punch_holes_in_guest_mem_layout_for_mappings(
1237 guest_mem_layout: Vec<(GuestAddress, u64, MemoryRegionOptions)>,
1238 file_backed_mappings: &[FileBackedMappingParameters],
1239 ) -> Vec<(GuestAddress, u64, MemoryRegionOptions)> {
1240 // Create a set containing (start, end) pairs with exclusive end (end = start + size; the byte
1241 // at end is not included in the range).
1242 let mut layout_set = BTreeSet::new();
1243 for (addr, size, options) in &guest_mem_layout {
1244 layout_set.insert((addr.offset(), addr.offset() + size, *options));
1245 }
1246
1247 for mapping in file_backed_mappings {
1248 let mapping_start = mapping.address;
1249 let mapping_end = mapping_start + mapping.size;
1250
1251 // Repeatedly split overlapping guest memory regions until no overlaps remain.
1252 while let Some((range_start, range_end, options)) = layout_set
1253 .iter()
1254 .find(|&&(range_start, range_end, _)| {
1255 mapping_start < range_end && mapping_end > range_start
1256 })
1257 .cloned()
1258 {
1259 layout_set.remove(&(range_start, range_end, options));
1260
1261 if range_start < mapping_start {
1262 layout_set.insert((range_start, mapping_start, options));
1263 }
1264 if range_end > mapping_end {
1265 layout_set.insert((mapping_end, range_end, options));
1266 }
1267 }
1268 }
1269
1270 // Build the final guest memory layout from the modified layout_set.
1271 layout_set
1272 .iter()
1273 .map(|(start, end, options)| (GuestAddress(*start), end - start, *options))
1274 .collect()
1275 }
1276
create_guest_memory( cfg: &Config, components: &VmComponents, hypervisor: &impl Hypervisor, ) -> Result<GuestMemory>1277 fn create_guest_memory(
1278 cfg: &Config,
1279 components: &VmComponents,
1280 hypervisor: &impl Hypervisor,
1281 ) -> Result<GuestMemory> {
1282 let guest_mem_layout = Arch::guest_memory_layout(components, hypervisor)
1283 .context("failed to create guest memory layout")?;
1284
1285 let guest_mem_layout =
1286 punch_holes_in_guest_mem_layout_for_mappings(guest_mem_layout, &cfg.file_backed_mappings);
1287
1288 let guest_mem = GuestMemory::new_with_options(&guest_mem_layout)
1289 .context("failed to create guest memory")?;
1290 let mut mem_policy = MemoryPolicy::empty();
1291 if components.hugepages {
1292 mem_policy |= MemoryPolicy::USE_HUGEPAGES;
1293 }
1294
1295 if cfg.lock_guest_memory {
1296 mem_policy |= MemoryPolicy::LOCK_GUEST_MEMORY;
1297 }
1298 guest_mem.set_memory_policy(mem_policy);
1299
1300 if cfg.unmap_guest_memory_on_fork {
1301 // Note that this isn't compatible with sandboxing. We could potentially fix that by
1302 // delaying the call until after the sandboxed devices are forked. However, the main use
1303 // for this is in conjunction with protected VMs, where most of the guest memory has been
1304 // unshared with the host. We'd need to be confident that the guest memory is unshared with
1305 // the host only after the `use_dontfork` call and those details will vary by hypervisor.
1306 // So, for now we keep things simple to be safe.
1307 guest_mem.use_dontfork().context("use_dontfork failed")?;
1308 }
1309
1310 Ok(guest_mem)
1311 }
1312
1313 #[cfg(any(target_arch = "aarch64"))]
1314 #[cfg(feature = "geniezone")]
run_gz(device_path: Option<&Path>, cfg: Config, components: VmComponents) -> Result<ExitState>1315 fn run_gz(device_path: Option<&Path>, cfg: Config, components: VmComponents) -> Result<ExitState> {
1316 let device_path = device_path.unwrap_or(Path::new(GENIEZONE_PATH));
1317 let gzvm = Geniezone::new_with_path(device_path)
1318 .with_context(|| format!("failed to open GenieZone device {}", device_path.display()))?;
1319
1320 let guest_mem = create_guest_memory(&cfg, &components, &gzvm)?;
1321
1322 #[cfg(feature = "swap")]
1323 let swap_controller = if let Some(swap_dir) = cfg.swap_dir.as_ref() {
1324 Some(
1325 SwapController::launch(guest_mem.clone(), swap_dir, &cfg.jail_config)
1326 .context("launch vmm-swap monitor process")?,
1327 )
1328 } else {
1329 None
1330 };
1331
1332 let vm =
1333 GeniezoneVm::new(&gzvm, guest_mem, components.hv_cfg).context("failed to create vm")?;
1334
1335 // Check that the VM was actually created in protected mode as expected.
1336 if cfg.protection_type.isolates_memory() && !vm.check_capability(VmCap::Protected) {
1337 bail!("Failed to create protected VM");
1338 }
1339 let vm_clone = vm.try_clone().context("failed to clone vm")?;
1340
1341 let ioapic_host_tube;
1342 let mut irq_chip = if cfg.split_irqchip {
1343 unimplemented!("Geniezone does not support split irqchip mode");
1344 } else {
1345 ioapic_host_tube = None;
1346
1347 GeniezoneKernelIrqChip::new(vm_clone, components.vcpu_count)
1348 .context("failed to create IRQ chip")?
1349 };
1350
1351 run_vm::<GeniezoneVcpu, GeniezoneVm>(
1352 cfg,
1353 components,
1354 vm,
1355 &mut irq_chip,
1356 ioapic_host_tube,
1357 #[cfg(feature = "swap")]
1358 swap_controller,
1359 )
1360 }
1361
run_kvm(device_path: Option<&Path>, cfg: Config, components: VmComponents) -> Result<ExitState>1362 fn run_kvm(device_path: Option<&Path>, cfg: Config, components: VmComponents) -> Result<ExitState> {
1363 let device_path = device_path.unwrap_or(Path::new(KVM_PATH));
1364 let kvm = Kvm::new_with_path(device_path)
1365 .with_context(|| format!("failed to open KVM device {}", device_path.display()))?;
1366
1367 let guest_mem = create_guest_memory(&cfg, &components, &kvm)?;
1368
1369 #[cfg(feature = "swap")]
1370 let swap_controller = if let Some(swap_dir) = cfg.swap_dir.as_ref() {
1371 Some(
1372 SwapController::launch(guest_mem.clone(), swap_dir, &cfg.jail_config)
1373 .context("launch vmm-swap monitor process")?,
1374 )
1375 } else {
1376 None
1377 };
1378
1379 let vm = KvmVm::new(&kvm, guest_mem, components.hv_cfg).context("failed to create vm")?;
1380
1381 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1382 if cfg.itmt {
1383 vm.set_platform_info_read_access(false)
1384 .context("failed to disable MSR_PLATFORM_INFO read access")?;
1385 }
1386
1387 if !cfg.userspace_msr.is_empty() {
1388 vm.enable_userspace_msr()
1389 .context("failed to enable userspace MSR handling, do you have kernel 5.10 or later")?;
1390 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1391 {
1392 let msr_list = get_override_msr_list(&cfg.userspace_msr);
1393 vm.set_msr_filter(msr_list)
1394 .context("failed to set msr filter")?;
1395 }
1396 }
1397
1398 // Check that the VM was actually created in protected mode as expected.
1399 if cfg.protection_type.isolates_memory() && !vm.check_capability(VmCap::Protected) {
1400 bail!("Failed to create protected VM");
1401 }
1402 let vm_clone = vm.try_clone().context("failed to clone vm")?;
1403
1404 enum KvmIrqChip {
1405 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1406 Split(KvmSplitIrqChip),
1407 Kernel(KvmKernelIrqChip),
1408 }
1409
1410 impl KvmIrqChip {
1411 fn as_mut(&mut self) -> &mut dyn IrqChipArch {
1412 match self {
1413 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1414 KvmIrqChip::Split(i) => i,
1415 KvmIrqChip::Kernel(i) => i,
1416 }
1417 }
1418 }
1419
1420 let ioapic_host_tube;
1421 let mut irq_chip = if cfg.split_irqchip {
1422 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
1423 unimplemented!("KVM split irqchip mode only supported on x86 processors");
1424 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1425 {
1426 let (host_tube, ioapic_device_tube) = Tube::pair().context("failed to create tube")?;
1427 ioapic_host_tube = Some(host_tube);
1428 KvmIrqChip::Split(
1429 KvmSplitIrqChip::new(
1430 vm_clone,
1431 components.vcpu_count,
1432 ioapic_device_tube,
1433 Some(120),
1434 )
1435 .context("failed to create IRQ chip")?,
1436 )
1437 }
1438 } else {
1439 ioapic_host_tube = None;
1440 KvmIrqChip::Kernel(
1441 KvmKernelIrqChip::new(vm_clone, components.vcpu_count)
1442 .context("failed to create IRQ chip")?,
1443 )
1444 };
1445
1446 run_vm::<KvmVcpu, KvmVm>(
1447 cfg,
1448 components,
1449 vm,
1450 irq_chip.as_mut(),
1451 ioapic_host_tube,
1452 #[cfg(feature = "swap")]
1453 swap_controller,
1454 )
1455 }
1456
1457 #[cfg(all(any(target_arch = "arm", target_arch = "aarch64"), feature = "gunyah"))]
run_gunyah( device_path: Option<&Path>, cfg: Config, components: VmComponents, ) -> Result<ExitState>1458 fn run_gunyah(
1459 device_path: Option<&Path>,
1460 cfg: Config,
1461 components: VmComponents,
1462 ) -> Result<ExitState> {
1463 use devices::GunyahIrqChip;
1464 use hypervisor::gunyah::{Gunyah, GunyahVcpu, GunyahVm};
1465
1466 let device_path = device_path.unwrap_or(Path::new(GUNYAH_PATH));
1467 let gunyah = Gunyah::new_with_path(device_path)
1468 .with_context(|| format!("failed to open Gunyah device {}", device_path.display()))?;
1469
1470 let guest_mem = create_guest_memory(&cfg, &components, &gunyah)?;
1471
1472 #[cfg(feature = "swap")]
1473 let swap_controller = if let Some(swap_dir) = cfg.swap_dir.as_ref() {
1474 Some(
1475 SwapController::launch(guest_mem.clone(), swap_dir, &cfg.jail_config)
1476 .context("launch vmm-swap monitor process")?,
1477 )
1478 } else {
1479 None
1480 };
1481
1482 let vm = GunyahVm::new(&gunyah, guest_mem, components.hv_cfg).context("failed to create vm")?;
1483
1484 // Check that the VM was actually created in protected mode as expected.
1485 if cfg.protection_type.isolates_memory() && !vm.check_capability(VmCap::Protected) {
1486 bail!("Failed to create protected VM");
1487 }
1488
1489 let vm_clone = vm.try_clone()?;
1490
1491 run_vm::<GunyahVcpu, GunyahVm>(
1492 cfg,
1493 components,
1494 vm,
1495 &mut GunyahIrqChip::new(vm_clone)?,
1496 None,
1497 #[cfg(feature = "swap")]
1498 swap_controller,
1499 )
1500 }
1501
1502 /// Choose a default hypervisor if no `--hypervisor` option was specified.
get_default_hypervisor() -> Option<HypervisorKind>1503 fn get_default_hypervisor() -> Option<HypervisorKind> {
1504 let kvm_path = Path::new(KVM_PATH);
1505 if kvm_path.exists() {
1506 return Some(HypervisorKind::Kvm {
1507 device: Some(kvm_path.to_path_buf()),
1508 });
1509 }
1510
1511 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
1512 #[cfg(feature = "geniezone")]
1513 {
1514 let gz_path = Path::new(GENIEZONE_PATH);
1515 if gz_path.exists() {
1516 return Some(HypervisorKind::Geniezone {
1517 device: Some(gz_path.to_path_buf()),
1518 });
1519 }
1520 }
1521
1522 #[cfg(all(
1523 unix,
1524 any(target_arch = "arm", target_arch = "aarch64"),
1525 feature = "gunyah"
1526 ))]
1527 {
1528 let gunyah_path = Path::new(GUNYAH_PATH);
1529 if gunyah_path.exists() {
1530 return Some(HypervisorKind::Gunyah {
1531 device: Some(gunyah_path.to_path_buf()),
1532 });
1533 }
1534 }
1535
1536 None
1537 }
1538
run_config(cfg: Config) -> Result<ExitState>1539 pub fn run_config(cfg: Config) -> Result<ExitState> {
1540 if let Some(async_executor) = cfg.async_executor {
1541 Executor::set_default_executor_kind(async_executor)
1542 .context("Failed to set the default async executor")?;
1543 }
1544
1545 let components = setup_vm_components(&cfg)?;
1546
1547 let hypervisor = cfg
1548 .hypervisor
1549 .clone()
1550 .or_else(get_default_hypervisor)
1551 .context("no enabled hypervisor")?;
1552
1553 debug!("creating hypervisor: {:?}", hypervisor);
1554
1555 match hypervisor {
1556 HypervisorKind::Kvm { device } => run_kvm(device.as_deref(), cfg, components),
1557 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
1558 #[cfg(feature = "geniezone")]
1559 HypervisorKind::Geniezone { device } => run_gz(device.as_deref(), cfg, components),
1560 #[cfg(all(
1561 unix,
1562 any(target_arch = "arm", target_arch = "aarch64"),
1563 feature = "gunyah"
1564 ))]
1565 HypervisorKind::Gunyah { device } => run_gunyah(device.as_deref(), cfg, components),
1566 }
1567 }
1568
run_vm<Vcpu, V>( cfg: Config, #[allow(unused_mut)] mut components: VmComponents, mut vm: V, irq_chip: &mut dyn IrqChipArch, ioapic_host_tube: Option<Tube>, #[cfg(feature = "swap")] swap_controller: Option<SwapController>, ) -> Result<ExitState> where Vcpu: VcpuArch + 'static, V: VmArch + 'static,1569 fn run_vm<Vcpu, V>(
1570 cfg: Config,
1571 #[allow(unused_mut)] mut components: VmComponents,
1572 mut vm: V,
1573 irq_chip: &mut dyn IrqChipArch,
1574 ioapic_host_tube: Option<Tube>,
1575 #[cfg(feature = "swap")] swap_controller: Option<SwapController>,
1576 ) -> Result<ExitState>
1577 where
1578 Vcpu: VcpuArch + 'static,
1579 V: VmArch + 'static,
1580 {
1581 if cfg.jail_config.is_some() {
1582 // Printing something to the syslog before entering minijail so that libc's syslogger has a
1583 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
1584 // access to those files will not be possible.
1585 info!("crosvm entering multiprocess mode");
1586 }
1587
1588 #[cfg(feature = "gpu")]
1589 let (gpu_control_host_tube, gpu_control_device_tube) =
1590 Tube::pair().context("failed to create gpu tube")?;
1591
1592 #[cfg(feature = "usb")]
1593 let (usb_control_tube, usb_provider) =
1594 HostBackendDeviceProvider::new().context("failed to create usb provider")?;
1595
1596 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
1597 // before any jailed devices have been spawned, so that we can catch any of them that fail very
1598 // quickly.
1599 let sigchld_fd = SignalFd::new(libc::SIGCHLD).context("failed to create signalfd")?;
1600
1601 let control_server_socket = match &cfg.socket_path {
1602 Some(path) => Some(UnlinkUnixSeqpacketListener(
1603 UnixSeqpacketListener::bind(path).context("failed to create control server")?,
1604 )),
1605 None => None,
1606 };
1607
1608 let mut control_tubes = Vec::new();
1609 let mut irq_control_tubes = Vec::new();
1610
1611 #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), feature = "gdb"))]
1612 if let Some(port) = cfg.gdb {
1613 // GDB needs a control socket to interrupt vcpus.
1614 let (gdb_host_tube, gdb_control_tube) = Tube::pair().context("failed to create tube")?;
1615 control_tubes.push(TaggedControlTube::Vm(gdb_host_tube));
1616 components.gdb = Some((port, gdb_control_tube));
1617 }
1618
1619 #[cfg(feature = "balloon")]
1620 let (balloon_host_tube, balloon_device_tube) = if cfg.balloon {
1621 if let Some(ref path) = cfg.balloon_control {
1622 (
1623 None,
1624 Some(Tube::new_from_unix_seqpacket(
1625 UnixSeqpacket::connect(path).with_context(|| {
1626 format!(
1627 "failed to connect to balloon control socket {}",
1628 path.display(),
1629 )
1630 })?,
1631 )),
1632 )
1633 } else {
1634 // Balloon gets a special socket so balloon requests can be forwarded
1635 // from the main process.
1636 let (host, device) = Tube::pair().context("failed to create tube")?;
1637 // Set recv timeout to avoid deadlock on sending BalloonControlCommand
1638 // before the guest is ready.
1639 host.set_recv_timeout(Some(Duration::from_millis(100)))
1640 .context("failed to set timeout")?;
1641 (Some(host), Some(device))
1642 }
1643 } else {
1644 (None, None)
1645 };
1646
1647 #[cfg(feature = "balloon")]
1648 let (balloon_wss_host_tube, balloon_wss_device_tube) = if cfg.balloon_wss_reporting {
1649 let (host, device) = Tube::pair().context("failed to create tube")?;
1650 host.set_recv_timeout(Some(Duration::from_millis(100)))
1651 .context("failed to set timeout")?;
1652 (Some(host), Some(device))
1653 } else {
1654 (None, None)
1655 };
1656
1657 // Create one control socket per disk.
1658 let mut disk_device_tubes = Vec::new();
1659 let mut disk_host_tubes = Vec::new();
1660 let disk_count = cfg.disks.len();
1661 for _ in 0..disk_count {
1662 let (disk_host_tub, disk_device_tube) = Tube::pair().context("failed to create tube")?;
1663 disk_host_tubes.push(disk_host_tub);
1664 disk_device_tubes.push(disk_device_tube);
1665 }
1666
1667 let mut pmem_device_tubes = Vec::new();
1668 let pmem_count = cfg.pmem_devices.len();
1669 for _ in 0..pmem_count {
1670 let (pmem_host_tube, pmem_device_tube) = Tube::pair().context("failed to create tube")?;
1671 pmem_device_tubes.push(pmem_device_tube);
1672 control_tubes.push(TaggedControlTube::VmMsync(pmem_host_tube));
1673 }
1674
1675 if let Some(ioapic_host_tube) = ioapic_host_tube {
1676 irq_control_tubes.push(ioapic_host_tube);
1677 }
1678
1679 let battery = if cfg.battery_config.is_some() {
1680 #[cfg_attr(
1681 not(feature = "power-monitor-powerd"),
1682 allow(clippy::manual_map, clippy::needless_match, unused_mut)
1683 )]
1684 let jail = if let Some(jail_config) = &cfg.jail_config {
1685 let mut config = SandboxConfig::new(jail_config, "battery");
1686 #[cfg(feature = "power-monitor-powerd")]
1687 {
1688 config.bind_mounts = true;
1689 }
1690 let mut jail =
1691 create_sandbox_minijail(&jail_config.pivot_root, MAX_OPEN_FILES_DEFAULT, &config)?;
1692
1693 // Setup a bind mount to the system D-Bus socket if the powerd monitor is used.
1694 #[cfg(feature = "power-monitor-powerd")]
1695 {
1696 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
1697 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
1698 }
1699 Some(jail)
1700 } else {
1701 None
1702 };
1703 (cfg.battery_config.as_ref().map(|c| c.type_), jail)
1704 } else {
1705 (cfg.battery_config.as_ref().map(|c| c.type_), None)
1706 };
1707
1708 let fs_count = cfg
1709 .shared_dirs
1710 .iter()
1711 .filter(|sd| sd.kind == SharedDirKind::FS)
1712 .count();
1713 let mut fs_device_tubes = Vec::with_capacity(fs_count);
1714 for _ in 0..fs_count {
1715 let (fs_host_tube, fs_device_tube) = Tube::pair().context("failed to create tube")?;
1716 control_tubes.push(TaggedControlTube::Fs(fs_host_tube));
1717 fs_device_tubes.push(fs_device_tube);
1718 }
1719
1720 let mut vvu_proxy_device_tubes = Vec::new();
1721 for _ in 0..cfg.vvu_proxy.len() {
1722 let (vvu_proxy_host_tube, vvu_proxy_device_tube) =
1723 Tube::pair().context("failed to create VVU proxy tube")?;
1724 control_tubes.push(TaggedControlTube::VmMemory {
1725 tube: vvu_proxy_host_tube,
1726 expose_with_viommu: false,
1727 });
1728 vvu_proxy_device_tubes.push(vvu_proxy_device_tube);
1729 }
1730
1731 let (vm_evt_wrtube, vm_evt_rdtube) =
1732 Tube::directional_pair().context("failed to create vm event tube")?;
1733
1734 let pstore_size = components.pstore.as_ref().map(|pstore| pstore.size as u64);
1735 let mut sys_allocator = SystemAllocator::new(
1736 Arch::get_system_allocator_config(&vm),
1737 pstore_size,
1738 &cfg.mmio_address_ranges,
1739 )
1740 .context("failed to create system allocator")?;
1741
1742 let ramoops_region = match &components.pstore {
1743 Some(pstore) => Some(
1744 arch::pstore::create_memory_region(
1745 &mut vm,
1746 sys_allocator.reserved_region().unwrap(),
1747 pstore,
1748 )
1749 .context("failed to allocate pstore region")?,
1750 ),
1751 None => None,
1752 };
1753
1754 create_file_backed_mappings(&cfg, &mut vm, &mut sys_allocator)?;
1755
1756 #[cfg(feature = "gpu")]
1757 // Hold on to the render server jail so it keeps running until we exit run_vm()
1758 let (_render_server_jail, render_server_fd) =
1759 if let Some(parameters) = &cfg.gpu_render_server_parameters {
1760 let (jail, fd) = start_gpu_render_server(&cfg, parameters)?;
1761 (Some(ScopedMinijail(jail)), Some(fd))
1762 } else {
1763 (None, None)
1764 };
1765
1766 #[cfg(feature = "balloon")]
1767 let init_balloon_size = components
1768 .memory_size
1769 .checked_sub(cfg.init_memory.map_or(components.memory_size, |m| {
1770 m.checked_mul(1024 * 1024).unwrap_or(u64::MAX)
1771 }))
1772 .context("failed to calculate init balloon size")?;
1773
1774 #[cfg(feature = "direct")]
1775 let mut irqs = Vec::new();
1776
1777 #[cfg(feature = "direct")]
1778 for irq in &cfg.direct_level_irq {
1779 if !sys_allocator.reserve_irq(*irq) {
1780 warn!("irq {} already reserved.", irq);
1781 }
1782 use devices::CrosvmDeviceId;
1783 let irq_event_source = IrqEventSource {
1784 device_id: CrosvmDeviceId::DirectIo.into(),
1785 queue_id: 0,
1786 device_name: format!("direct edge irq {}", irq),
1787 };
1788 let irq_evt = devices::IrqLevelEvent::new().context("failed to create event")?;
1789 irq_chip
1790 .register_level_irq_event(*irq, &irq_evt, irq_event_source)
1791 .unwrap();
1792 let direct_irq = devices::DirectIrq::new_level(&irq_evt)
1793 .context("failed to enable interrupt forwarding")?;
1794 direct_irq
1795 .irq_enable(*irq)
1796 .context("failed to enable interrupt forwarding")?;
1797 irqs.push(direct_irq);
1798 }
1799
1800 #[cfg(feature = "direct")]
1801 for irq in &cfg.direct_edge_irq {
1802 if !sys_allocator.reserve_irq(*irq) {
1803 warn!("irq {} already reserved.", irq);
1804 }
1805 use devices::CrosvmDeviceId;
1806 let irq_event_source = IrqEventSource {
1807 device_id: CrosvmDeviceId::DirectIo.into(),
1808 queue_id: 0,
1809 device_name: format!("direct level irq {}", irq),
1810 };
1811 let irq_evt = devices::IrqEdgeEvent::new().context("failed to create event")?;
1812 irq_chip
1813 .register_edge_irq_event(*irq, &irq_evt, irq_event_source)
1814 .unwrap();
1815 let direct_irq = devices::DirectIrq::new_edge(&irq_evt)
1816 .context("failed to enable interrupt forwarding")?;
1817 direct_irq
1818 .irq_enable(*irq)
1819 .context("failed to enable interrupt forwarding")?;
1820 irqs.push(direct_irq);
1821 }
1822
1823 // Reserve direct mmio range in advance.
1824 #[cfg(feature = "direct")]
1825 if let Some(mmio) = &cfg.direct_mmio {
1826 for range in mmio.ranges.iter() {
1827 AddressRange::from_start_and_size(range.base, range.len)
1828 .ok_or(ResourceError::OutOfSpace)
1829 .and_then(|range| sys_allocator.reserve_mmio(range))
1830 .with_context(|| {
1831 format!(
1832 "failed to reserved direct mmio: {:x}-{:x}",
1833 range.base,
1834 range.base + range.len - 1,
1835 )
1836 })?;
1837 }
1838 };
1839
1840 let mut iommu_attached_endpoints: BTreeMap<u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>> =
1841 BTreeMap::new();
1842 let mut iova_max_addr: Option<u64> = None;
1843
1844 let (reg_evt_wrtube, reg_evt_rdtube) =
1845 Tube::directional_pair().context("failed to create registered event tube")?;
1846
1847 let mut devices = create_devices(
1848 &cfg,
1849 &mut vm,
1850 &mut sys_allocator,
1851 &vm_evt_wrtube,
1852 &mut iommu_attached_endpoints,
1853 &mut irq_control_tubes,
1854 &mut control_tubes,
1855 #[cfg(feature = "balloon")]
1856 balloon_device_tube,
1857 #[cfg(feature = "balloon")]
1858 balloon_wss_device_tube,
1859 #[cfg(feature = "balloon")]
1860 init_balloon_size,
1861 &mut disk_device_tubes,
1862 &mut pmem_device_tubes,
1863 &mut fs_device_tubes,
1864 #[cfg(feature = "usb")]
1865 usb_provider,
1866 #[cfg(feature = "gpu")]
1867 gpu_control_device_tube,
1868 #[cfg(feature = "gpu")]
1869 render_server_fd,
1870 &mut vvu_proxy_device_tubes,
1871 components.memory_size,
1872 &mut iova_max_addr,
1873 ®_evt_wrtube,
1874 )?;
1875
1876 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
1877 let hp_endpoints_ranges: Vec<RangeInclusive<u32>> = Vec::new();
1878 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1879 let mut hp_endpoints_ranges: Vec<RangeInclusive<u32>> = Vec::new();
1880 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1881 let mut hotplug_buses: Vec<(u8, Arc<Mutex<dyn HotPlugBus>>)> = Vec::new();
1882 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1883 let mut gpe_notify_devs: Vec<(u32, Arc<Mutex<dyn GpeNotify>>)> = Vec::new();
1884 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1885 let mut pme_notify_devs: Vec<(u8, Arc<Mutex<dyn PmeNotify>>)> = Vec::new();
1886 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1887 {
1888 #[cfg(feature = "direct")]
1889 let rp_host = cfg.pcie_rp.clone();
1890 #[cfg(not(feature = "direct"))]
1891 let rp_host: Vec<HostPcieRootPortParameters> = Vec::new();
1892
1893 // Create Pcie Root Port
1894 create_pcie_root_port(
1895 rp_host,
1896 &mut sys_allocator,
1897 &mut irq_control_tubes,
1898 &mut control_tubes,
1899 &mut devices,
1900 &mut hotplug_buses,
1901 &mut hp_endpoints_ranges,
1902 &mut gpe_notify_devs,
1903 &mut pme_notify_devs,
1904 )?;
1905 }
1906
1907 arch::assign_pci_addresses(&mut devices, &mut sys_allocator)?;
1908
1909 let (translate_response_senders, request_rx) = setup_virtio_access_platform(
1910 &mut sys_allocator,
1911 &mut iommu_attached_endpoints,
1912 &mut devices,
1913 )?;
1914
1915 let iommu_host_tube = if !iommu_attached_endpoints.is_empty()
1916 || (cfg.vfio_isolate_hotplug && !hp_endpoints_ranges.is_empty())
1917 {
1918 let (iommu_host_tube, iommu_device_tube) = Tube::pair().context("failed to create tube")?;
1919 let iommu_dev = create_iommu_device(
1920 cfg.protection_type,
1921 &cfg.jail_config,
1922 iova_max_addr.unwrap_or(u64::MAX),
1923 iommu_attached_endpoints,
1924 hp_endpoints_ranges,
1925 translate_response_senders,
1926 request_rx,
1927 iommu_device_tube,
1928 )?;
1929
1930 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
1931 irq_control_tubes.push(msi_host_tube);
1932 let (ioevent_host_tube, ioevent_device_tube) =
1933 Tube::pair().context("failed to create ioevent tube")?;
1934 control_tubes.push(TaggedControlTube::VmMemory {
1935 tube: ioevent_host_tube,
1936 expose_with_viommu: false,
1937 });
1938 let mut dev = VirtioPciDevice::new(
1939 vm.get_memory().clone(),
1940 iommu_dev.dev,
1941 msi_device_tube,
1942 cfg.disable_virtio_intx,
1943 None,
1944 ioevent_device_tube,
1945 )
1946 .context("failed to create virtio pci dev")?;
1947 // early reservation for viommu.
1948 dev.allocate_address(&mut sys_allocator)
1949 .context("failed to allocate resources early for virtio pci dev")?;
1950 let dev = Box::new(dev);
1951 devices.push((dev, iommu_dev.jail));
1952 Some(iommu_host_tube)
1953 } else {
1954 None
1955 };
1956
1957 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1958 for device in devices
1959 .iter_mut()
1960 .filter_map(|(dev, _)| dev.as_pci_device_mut())
1961 {
1962 let sdts = device
1963 .generate_acpi(components.acpi_sdts)
1964 .or_else(|| {
1965 error!("ACPI table generation error");
1966 None
1967 })
1968 .ok_or_else(|| anyhow!("failed to generate ACPI table"))?;
1969 components.acpi_sdts = sdts;
1970 }
1971
1972 // KVM_CREATE_VCPU uses apic id for x86 and uses cpu id for others.
1973 let mut vcpu_ids = Vec::new();
1974
1975 #[cfg_attr(not(feature = "direct"), allow(unused_mut))]
1976 let mut linux = Arch::build_vm::<V, Vcpu>(
1977 components,
1978 &vm_evt_wrtube,
1979 &mut sys_allocator,
1980 &cfg.serial_parameters,
1981 simple_jail(&cfg.jail_config, "serial_device")?,
1982 battery,
1983 vm,
1984 ramoops_region,
1985 devices,
1986 irq_chip,
1987 &mut vcpu_ids,
1988 cfg.dump_device_tree_blob.clone(),
1989 simple_jail(&cfg.jail_config, "serial_device")?,
1990 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1991 simple_jail(&cfg.jail_config, "block_device")?,
1992 #[cfg(feature = "swap")]
1993 swap_controller.as_ref(),
1994 )
1995 .context("the architecture failed to build the vm")?;
1996
1997 if let Some(tube) = linux.vm_request_tube.take() {
1998 control_tubes.push(TaggedControlTube::Vm(tube));
1999 }
2000
2001 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2002 let (hp_control_tube, hp_worker_tube) = mpsc::channel();
2003
2004 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2005 let hp_thread = {
2006 for (bus_num, hp_bus) in hotplug_buses {
2007 linux.hotplug_bus.insert(bus_num, hp_bus);
2008 }
2009
2010 if let Some(pm) = &linux.pm {
2011 while let Some((gpe, notify_dev)) = gpe_notify_devs.pop() {
2012 pm.lock().register_gpe_notify_dev(gpe, notify_dev);
2013 }
2014 while let Some((bus, notify_dev)) = pme_notify_devs.pop() {
2015 pm.lock().register_pme_notify_dev(bus, notify_dev);
2016 }
2017 }
2018
2019 let pci_root = linux.root_config.clone();
2020 std::thread::Builder::new()
2021 .name("pci_root".to_string())
2022 .spawn(move || start_pci_root_worker(pci_root, hp_worker_tube))?
2023 };
2024
2025 #[cfg(feature = "direct")]
2026 if let Some(pmio) = &cfg.direct_pmio {
2027 let direct_io = Arc::new(
2028 devices::DirectIo::new(&pmio.path, false).context("failed to open direct io device")?,
2029 );
2030 for range in pmio.ranges.iter() {
2031 linux
2032 .io_bus
2033 .insert_sync(direct_io.clone(), range.base, range.len)
2034 .context("Error with pmio")?;
2035 }
2036 };
2037
2038 #[cfg(feature = "direct")]
2039 if let Some(mmio) = &cfg.direct_mmio {
2040 let direct_mmio = Arc::new(
2041 devices::DirectMmio::new(&mmio.path, false, &mmio.ranges)
2042 .context("failed to open direct mmio device")?,
2043 );
2044
2045 for range in mmio.ranges.iter() {
2046 linux
2047 .mmio_bus
2048 .insert_sync(direct_mmio.clone(), range.base, range.len)
2049 .context("Error with mmio")?;
2050 }
2051 };
2052
2053 let gralloc = RutabagaGralloc::new().context("failed to create gralloc")?;
2054
2055 run_control(
2056 linux,
2057 sys_allocator,
2058 cfg,
2059 control_server_socket,
2060 irq_control_tubes,
2061 control_tubes,
2062 #[cfg(feature = "balloon")]
2063 balloon_host_tube,
2064 #[cfg(feature = "balloon")]
2065 balloon_wss_host_tube,
2066 &disk_host_tubes,
2067 #[cfg(feature = "gpu")]
2068 gpu_control_host_tube,
2069 #[cfg(feature = "usb")]
2070 usb_control_tube,
2071 vm_evt_rdtube,
2072 vm_evt_wrtube,
2073 sigchld_fd,
2074 gralloc,
2075 vcpu_ids,
2076 iommu_host_tube,
2077 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2078 hp_control_tube,
2079 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2080 hp_thread,
2081 #[cfg(feature = "swap")]
2082 swap_controller,
2083 reg_evt_rdtube,
2084 )
2085 }
2086
2087 // Hotplug command is facing dead lock issue when it tries to acquire the lock
2088 // for pci root in the vm control thread. Dead lock could happen when the vm
2089 // control thread(Thread A namely) is handling the hotplug command and it tries
2090 // to get the lock for pci root. However, the lock is already hold by another
2091 // device in thread B, which is actively sending an vm control to be handled by
2092 // thread A and waiting for response. However, thread A is blocked on acquiring
2093 // the lock, so dead lock happens. In order to resolve this issue, we add this
2094 // worker thread and push all work that locks pci root to this thread.
2095 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
start_pci_root_worker( pci_root: Arc<Mutex<PciRoot>>, hp_device_tube: mpsc::Receiver<PciRootCommand>, )2096 fn start_pci_root_worker(
2097 pci_root: Arc<Mutex<PciRoot>>,
2098 hp_device_tube: mpsc::Receiver<PciRootCommand>,
2099 ) {
2100 loop {
2101 match hp_device_tube.recv() {
2102 Ok(cmd) => match cmd {
2103 PciRootCommand::Add(addr, device) => {
2104 pci_root.lock().add_device(addr, device);
2105 }
2106 PciRootCommand::AddBridge(pci_bus) => pci_root.lock().add_bridge(pci_bus),
2107 PciRootCommand::Remove(addr) => {
2108 pci_root.lock().remove_device(addr);
2109 }
2110 PciRootCommand::Kill => break,
2111 },
2112 Err(e) => {
2113 error!("Error: pci root worker channel closed: {}", e);
2114 break;
2115 }
2116 }
2117 }
2118 }
2119
2120 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
get_hp_bus<V: VmArch, Vcpu: VcpuArch>( linux: &RunnableLinuxVm<V, Vcpu>, host_addr: PciAddress, ) -> Result<Arc<Mutex<dyn HotPlugBus>>>2121 fn get_hp_bus<V: VmArch, Vcpu: VcpuArch>(
2122 linux: &RunnableLinuxVm<V, Vcpu>,
2123 host_addr: PciAddress,
2124 ) -> Result<Arc<Mutex<dyn HotPlugBus>>> {
2125 for (_, hp_bus) in linux.hotplug_bus.iter() {
2126 if hp_bus.lock().is_match(host_addr).is_some() {
2127 return Ok(hp_bus.clone());
2128 }
2129 }
2130 Err(anyhow!("Failed to find a suitable hotplug bus"))
2131 }
2132
2133 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
add_hotplug_device<V: VmArch, Vcpu: VcpuArch>( linux: &mut RunnableLinuxVm<V, Vcpu>, sys_allocator: &mut SystemAllocator, cfg: &Config, irq_control_tubes: &mut Vec<Tube>, control_tubes: &mut Vec<TaggedControlTube>, hp_control_tube: &mpsc::Sender<PciRootCommand>, iommu_host_tube: &Option<Tube>, device: &HotPlugDeviceInfo, #[cfg(feature = "swap")] swap_controller: Option<&SwapController>, ) -> Result<()>2134 fn add_hotplug_device<V: VmArch, Vcpu: VcpuArch>(
2135 linux: &mut RunnableLinuxVm<V, Vcpu>,
2136 sys_allocator: &mut SystemAllocator,
2137 cfg: &Config,
2138 irq_control_tubes: &mut Vec<Tube>,
2139 control_tubes: &mut Vec<TaggedControlTube>,
2140 hp_control_tube: &mpsc::Sender<PciRootCommand>,
2141 iommu_host_tube: &Option<Tube>,
2142 device: &HotPlugDeviceInfo,
2143 #[cfg(feature = "swap")] swap_controller: Option<&SwapController>,
2144 ) -> Result<()> {
2145 let host_addr = PciAddress::from_path(&device.path)
2146 .context("failed to parse hotplug device's PCI address")?;
2147 let hp_bus = get_hp_bus(linux, host_addr)?;
2148
2149 let (host_key, pci_address) = match device.device_type {
2150 HotPlugDeviceType::UpstreamPort | HotPlugDeviceType::DownstreamPort => {
2151 let (vm_host_tube, vm_device_tube) = Tube::pair().context("failed to create tube")?;
2152 control_tubes.push(TaggedControlTube::Vm(vm_host_tube));
2153 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
2154 irq_control_tubes.push(msi_host_tube);
2155 let pcie_host = PcieHostPort::new(device.path.as_path(), vm_device_tube)?;
2156 let (host_key, pci_bridge) = match device.device_type {
2157 HotPlugDeviceType::UpstreamPort => {
2158 let host_key = HostHotPlugKey::UpstreamPort { host_addr };
2159 let pcie_upstream_port = Arc::new(Mutex::new(PcieUpstreamPort::new_from_host(
2160 pcie_host, true,
2161 )?));
2162 let pci_bridge =
2163 Box::new(PciBridge::new(pcie_upstream_port.clone(), msi_device_tube));
2164 linux
2165 .hotplug_bus
2166 .insert(pci_bridge.get_secondary_num(), pcie_upstream_port);
2167 (host_key, pci_bridge)
2168 }
2169 HotPlugDeviceType::DownstreamPort => {
2170 let host_key = HostHotPlugKey::DownstreamPort { host_addr };
2171 let pcie_downstream_port = Arc::new(Mutex::new(
2172 PcieDownstreamPort::new_from_host(pcie_host, true)?,
2173 ));
2174 let pci_bridge = Box::new(PciBridge::new(
2175 pcie_downstream_port.clone(),
2176 msi_device_tube,
2177 ));
2178 linux
2179 .hotplug_bus
2180 .insert(pci_bridge.get_secondary_num(), pcie_downstream_port);
2181 (host_key, pci_bridge)
2182 }
2183 _ => {
2184 bail!("Impossible to reach here")
2185 }
2186 };
2187 let pci_address = Arch::register_pci_device(
2188 linux,
2189 pci_bridge,
2190 None,
2191 sys_allocator,
2192 hp_control_tube,
2193 #[cfg(feature = "swap")]
2194 swap_controller,
2195 )?;
2196
2197 (host_key, pci_address)
2198 }
2199 HotPlugDeviceType::EndPoint => {
2200 let host_key = HostHotPlugKey::Vfio { host_addr };
2201 let (vfio_device, jail, viommu_mapper) = create_vfio_device(
2202 &cfg.jail_config,
2203 &linux.vm,
2204 sys_allocator,
2205 irq_control_tubes,
2206 control_tubes,
2207 &device.path,
2208 true,
2209 None,
2210 None,
2211 None,
2212 if iommu_host_tube.is_some() {
2213 IommuDevType::VirtioIommu
2214 } else {
2215 IommuDevType::NoIommu
2216 },
2217 #[cfg(feature = "direct")]
2218 false,
2219 )?;
2220 let vfio_pci_device = match vfio_device {
2221 VfioDeviceVariant::Pci(pci) => Box::new(pci),
2222 VfioDeviceVariant::Platform(_) => bail!("vfio platform hotplug not supported"),
2223 };
2224 let pci_address = Arch::register_pci_device(
2225 linux,
2226 vfio_pci_device,
2227 jail,
2228 sys_allocator,
2229 hp_control_tube,
2230 #[cfg(feature = "swap")]
2231 swap_controller,
2232 )?;
2233 if let Some(iommu_host_tube) = iommu_host_tube {
2234 let endpoint_addr = pci_address.to_u32();
2235 let vfio_wrapper = viommu_mapper.context("expected mapper")?;
2236 let descriptor = vfio_wrapper.clone_as_raw_descriptor()?;
2237 let request =
2238 VirtioIOMMURequest::VfioCommand(VirtioIOMMUVfioCommand::VfioDeviceAdd {
2239 endpoint_addr,
2240 wrapper_id: vfio_wrapper.id(),
2241 container: {
2242 // Safe because the descriptor is uniquely owned by `descriptor`.
2243 unsafe { File::from_raw_descriptor(descriptor) }
2244 },
2245 });
2246 match virtio_iommu_request(iommu_host_tube, &request)
2247 .map_err(|_| VirtioIOMMUVfioError::SocketFailed)?
2248 {
2249 VirtioIOMMUResponse::VfioResponse(VirtioIOMMUVfioResult::Ok) => (),
2250 resp => bail!("Unexpected message response: {:?}", resp),
2251 }
2252 }
2253
2254 (host_key, pci_address)
2255 }
2256 };
2257 hp_bus.lock().add_hotplug_device(host_key, pci_address);
2258 if device.hp_interrupt {
2259 hp_bus.lock().hot_plug(pci_address);
2260 }
2261 Ok(())
2262 }
2263
2264 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
remove_hotplug_bridge<V: VmArch, Vcpu: VcpuArch>( linux: &RunnableLinuxVm<V, Vcpu>, sys_allocator: &mut SystemAllocator, buses_to_remove: &mut Vec<u8>, host_key: HostHotPlugKey, child_bus: u8, ) -> Result<()>2265 fn remove_hotplug_bridge<V: VmArch, Vcpu: VcpuArch>(
2266 linux: &RunnableLinuxVm<V, Vcpu>,
2267 sys_allocator: &mut SystemAllocator,
2268 buses_to_remove: &mut Vec<u8>,
2269 host_key: HostHotPlugKey,
2270 child_bus: u8,
2271 ) -> Result<()> {
2272 for (bus_num, hp_bus) in linux.hotplug_bus.iter() {
2273 let mut hp_bus_lock = hp_bus.lock();
2274 if let Some(pci_addr) = hp_bus_lock.get_hotplug_device(host_key) {
2275 sys_allocator.release_pci(pci_addr.bus, pci_addr.dev, pci_addr.func);
2276 hp_bus_lock.hot_unplug(pci_addr);
2277 buses_to_remove.push(child_bus);
2278 if hp_bus_lock.is_empty() {
2279 if let Some(hotplug_key) = hp_bus_lock.get_hotplug_key() {
2280 remove_hotplug_bridge(
2281 linux,
2282 sys_allocator,
2283 buses_to_remove,
2284 hotplug_key,
2285 *bus_num,
2286 )?;
2287 }
2288 }
2289 return Ok(());
2290 }
2291 }
2292
2293 Err(anyhow!(
2294 "Can not find device {:?} on hotplug buses",
2295 host_key
2296 ))
2297 }
2298
2299 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
remove_hotplug_device<V: VmArch, Vcpu: VcpuArch>( linux: &mut RunnableLinuxVm<V, Vcpu>, sys_allocator: &mut SystemAllocator, iommu_host_tube: &Option<Tube>, device: &HotPlugDeviceInfo, ) -> Result<()>2300 fn remove_hotplug_device<V: VmArch, Vcpu: VcpuArch>(
2301 linux: &mut RunnableLinuxVm<V, Vcpu>,
2302 sys_allocator: &mut SystemAllocator,
2303 iommu_host_tube: &Option<Tube>,
2304 device: &HotPlugDeviceInfo,
2305 ) -> Result<()> {
2306 let host_addr = PciAddress::from_path(&device.path)?;
2307 let host_key = match device.device_type {
2308 HotPlugDeviceType::UpstreamPort => HostHotPlugKey::UpstreamPort { host_addr },
2309 HotPlugDeviceType::DownstreamPort => HostHotPlugKey::DownstreamPort { host_addr },
2310 HotPlugDeviceType::EndPoint => HostHotPlugKey::Vfio { host_addr },
2311 };
2312
2313 let hp_bus = linux
2314 .hotplug_bus
2315 .iter()
2316 .find(|(_, hp_bus)| {
2317 let hp_bus = hp_bus.lock();
2318 hp_bus.get_hotplug_device(host_key).is_some()
2319 })
2320 .map(|(bus_num, hp_bus)| (*bus_num, hp_bus.clone()));
2321
2322 if let Some((bus_num, hp_bus)) = hp_bus {
2323 let mut buses_to_remove = Vec::new();
2324 let mut removed_key = None;
2325 let mut hp_bus_lock = hp_bus.lock();
2326 if let Some(pci_addr) = hp_bus_lock.get_hotplug_device(host_key) {
2327 if let Some(iommu_host_tube) = iommu_host_tube {
2328 let request =
2329 VirtioIOMMURequest::VfioCommand(VirtioIOMMUVfioCommand::VfioDeviceDel {
2330 endpoint_addr: pci_addr.to_u32(),
2331 });
2332 match virtio_iommu_request(iommu_host_tube, &request)
2333 .map_err(|_| VirtioIOMMUVfioError::SocketFailed)?
2334 {
2335 VirtioIOMMUResponse::VfioResponse(VirtioIOMMUVfioResult::Ok) => (),
2336 resp => bail!("Unexpected message response: {:?}", resp),
2337 }
2338 }
2339 let mut empty_simbling = true;
2340 if let Some(HostHotPlugKey::DownstreamPort { host_addr }) =
2341 hp_bus_lock.get_hotplug_key()
2342 {
2343 let addr_alias = host_addr;
2344 for (simbling_bus_num, hp_bus) in linux.hotplug_bus.iter() {
2345 if *simbling_bus_num != bus_num {
2346 let hp_bus_lock = hp_bus.lock();
2347 let hotplug_key = hp_bus_lock.get_hotplug_key();
2348 if let Some(HostHotPlugKey::DownstreamPort { host_addr }) = hotplug_key {
2349 if addr_alias.bus == host_addr.bus && !hp_bus_lock.is_empty() {
2350 empty_simbling = false;
2351 break;
2352 }
2353 }
2354 }
2355 }
2356 }
2357
2358 // If all simbling downstream ports are empty, do not send hot unplug event for this
2359 // downstream port. Root port will send one plug out interrupt and remove all
2360 // the remaining devices
2361 if !empty_simbling {
2362 hp_bus_lock.hot_unplug(pci_addr);
2363 }
2364
2365 sys_allocator.release_pci(pci_addr.bus, pci_addr.dev, pci_addr.func);
2366 if empty_simbling || hp_bus_lock.is_empty() {
2367 if let Some(hotplug_key) = hp_bus_lock.get_hotplug_key() {
2368 removed_key = Some(hotplug_key);
2369 remove_hotplug_bridge(
2370 linux,
2371 sys_allocator,
2372 &mut buses_to_remove,
2373 hotplug_key,
2374 bus_num,
2375 )?;
2376 }
2377 }
2378 }
2379
2380 // Some types of TBT device has a few empty downstream ports. The emulated bridges
2381 // of these ports won't be removed since no vfio device is connected to our emulated
2382 // bridges. So we explicitly check all simbling bridges of the removed bridge here,
2383 // and remove them if bridge has no child device connected.
2384 if let Some(HostHotPlugKey::DownstreamPort { host_addr }) = removed_key {
2385 let addr_alias = host_addr;
2386 for (simbling_bus_num, hp_bus) in linux.hotplug_bus.iter() {
2387 if *simbling_bus_num != bus_num {
2388 let hp_bus_lock = hp_bus.lock();
2389 let hotplug_key = hp_bus_lock.get_hotplug_key();
2390 if let Some(HostHotPlugKey::DownstreamPort { host_addr }) = hotplug_key {
2391 if addr_alias.bus == host_addr.bus && hp_bus_lock.is_empty() {
2392 remove_hotplug_bridge(
2393 linux,
2394 sys_allocator,
2395 &mut buses_to_remove,
2396 hotplug_key.unwrap(),
2397 *simbling_bus_num,
2398 )?;
2399 }
2400 }
2401 }
2402 }
2403 }
2404 for bus in buses_to_remove.iter() {
2405 linux.hotplug_bus.remove(bus);
2406 }
2407 return Ok(());
2408 }
2409
2410 Err(anyhow!(
2411 "Can not find device {:?} on hotplug buses",
2412 host_key
2413 ))
2414 }
2415
trigger_vm_suspend_and_wait_for_entry( guest_suspended_cvar: Arc<(Mutex<bool>, Condvar)>, tube: &SendTube, response: vm_control::VmResponse, suspend_evt: Event, pm: Option<Arc<Mutex<dyn PmResource + Send>>>, )2416 pub fn trigger_vm_suspend_and_wait_for_entry(
2417 guest_suspended_cvar: Arc<(Mutex<bool>, Condvar)>,
2418 tube: &SendTube,
2419 response: vm_control::VmResponse,
2420 suspend_evt: Event,
2421 pm: Option<Arc<Mutex<dyn PmResource + Send>>>,
2422 ) {
2423 let (lock, cvar) = &*guest_suspended_cvar;
2424 let mut guest_suspended = lock.lock();
2425
2426 *guest_suspended = false;
2427
2428 // During suspend also emulate sleepbtn, which allows to suspend VM (if running e.g. acpid and
2429 // reacts on sleep button events)
2430 if let Some(pm) = pm {
2431 pm.lock().slpbtn_evt();
2432 } else {
2433 error!("generating sleepbtn during suspend not supported");
2434 }
2435
2436 // Wait for notification about guest suspension, if not received after 15sec,
2437 // proceed anyway.
2438 let result = cvar.wait_timeout(guest_suspended, std::time::Duration::from_secs(15));
2439 guest_suspended = result.0;
2440
2441 if result.1.timed_out() {
2442 warn!("Guest suspension timeout - proceeding anyway");
2443 } else if *guest_suspended {
2444 info!("Guest suspended");
2445 }
2446
2447 if let Err(e) = suspend_evt.signal() {
2448 error!("failed to trigger suspend event: {}", e);
2449 }
2450 // Now we ready to send response over the tube and communicate that VM suspend has finished
2451 if let Err(e) = tube.send(&response) {
2452 error!("failed to send VmResponse: {}", e);
2453 }
2454 }
2455
2456 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
handle_hotplug_command<V: VmArch, Vcpu: VcpuArch>( linux: &mut RunnableLinuxVm<V, Vcpu>, sys_allocator: &mut SystemAllocator, cfg: &Config, add_irq_control_tubes: &mut Vec<Tube>, add_tubes: &mut Vec<TaggedControlTube>, hp_control_tube: &mpsc::Sender<PciRootCommand>, iommu_host_tube: &Option<Tube>, device: &HotPlugDeviceInfo, add: bool, #[cfg(feature = "swap")] swap_controller: Option<&SwapController>, ) -> VmResponse2457 fn handle_hotplug_command<V: VmArch, Vcpu: VcpuArch>(
2458 linux: &mut RunnableLinuxVm<V, Vcpu>,
2459 sys_allocator: &mut SystemAllocator,
2460 cfg: &Config,
2461 add_irq_control_tubes: &mut Vec<Tube>,
2462 add_tubes: &mut Vec<TaggedControlTube>,
2463 hp_control_tube: &mpsc::Sender<PciRootCommand>,
2464 iommu_host_tube: &Option<Tube>,
2465 device: &HotPlugDeviceInfo,
2466 add: bool,
2467 #[cfg(feature = "swap")] swap_controller: Option<&SwapController>,
2468 ) -> VmResponse {
2469 let iommu_host_tube = if cfg.vfio_isolate_hotplug {
2470 iommu_host_tube
2471 } else {
2472 &None
2473 };
2474
2475 let ret = if add {
2476 add_hotplug_device(
2477 linux,
2478 sys_allocator,
2479 cfg,
2480 add_irq_control_tubes,
2481 add_tubes,
2482 hp_control_tube,
2483 iommu_host_tube,
2484 device,
2485 #[cfg(feature = "swap")]
2486 swap_controller,
2487 )
2488 } else {
2489 remove_hotplug_device(linux, sys_allocator, iommu_host_tube, device)
2490 };
2491
2492 match ret {
2493 Ok(()) => VmResponse::Ok,
2494 Err(e) => {
2495 error!("hanlde_hotplug_command failure: {}", e);
2496 add_tubes.clear();
2497 VmResponse::Err(base::Error::new(libc::EINVAL))
2498 }
2499 }
2500 }
2501
run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>( mut linux: RunnableLinuxVm<V, Vcpu>, sys_allocator: SystemAllocator, cfg: Config, control_server_socket: Option<UnlinkUnixSeqpacketListener>, irq_control_tubes: Vec<Tube>, mut control_tubes: Vec<TaggedControlTube>, #[cfg(feature = "balloon")] balloon_host_tube: Option<Tube>, #[cfg(feature = "balloon")] balloon_wss_host_tube: Option<Tube>, disk_host_tubes: &[Tube], #[cfg(feature = "gpu")] gpu_control_tube: Tube, #[cfg(feature = "usb")] usb_control_tube: Tube, vm_evt_rdtube: RecvTube, vm_evt_wrtube: SendTube, sigchld_fd: SignalFd, mut gralloc: RutabagaGralloc, vcpu_ids: Vec<usize>, iommu_host_tube: Option<Tube>, #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] hp_control_tube: mpsc::Sender< PciRootCommand, >, #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] hp_thread: std::thread::JoinHandle<()>, #[cfg(feature = "swap")] swap_controller: Option<SwapController>, reg_evt_rdtube: RecvTube, ) -> Result<ExitState>2502 fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
2503 mut linux: RunnableLinuxVm<V, Vcpu>,
2504 sys_allocator: SystemAllocator,
2505 cfg: Config,
2506 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
2507 irq_control_tubes: Vec<Tube>,
2508 mut control_tubes: Vec<TaggedControlTube>,
2509 #[cfg(feature = "balloon")] balloon_host_tube: Option<Tube>,
2510 #[cfg(feature = "balloon")] balloon_wss_host_tube: Option<Tube>,
2511 disk_host_tubes: &[Tube],
2512 #[cfg(feature = "gpu")] gpu_control_tube: Tube,
2513 #[cfg(feature = "usb")] usb_control_tube: Tube,
2514 vm_evt_rdtube: RecvTube,
2515 vm_evt_wrtube: SendTube,
2516 sigchld_fd: SignalFd,
2517 mut gralloc: RutabagaGralloc,
2518 vcpu_ids: Vec<usize>,
2519 iommu_host_tube: Option<Tube>,
2520 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] hp_control_tube: mpsc::Sender<
2521 PciRootCommand,
2522 >,
2523 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] hp_thread: std::thread::JoinHandle<()>,
2524 #[cfg(feature = "swap")] swap_controller: Option<SwapController>,
2525 reg_evt_rdtube: RecvTube,
2526 ) -> Result<ExitState> {
2527 #[derive(EventToken)]
2528 enum Token {
2529 VmEvent,
2530 Suspend,
2531 ChildSignal,
2532 VmControlServer,
2533 VmControl { index: usize },
2534 RegisteredEvent,
2535 }
2536
2537 // Tube keyed on the socket path used to create it.
2538 struct AddressedTube {
2539 tube: Rc<Tube>,
2540 socket_addr: String,
2541 }
2542
2543 impl PartialEq for AddressedTube {
2544 fn eq(&self, other: &Self) -> bool {
2545 self.socket_addr == other.socket_addr
2546 }
2547 }
2548
2549 impl Eq for AddressedTube {}
2550
2551 impl Hash for AddressedTube {
2552 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
2553 self.socket_addr.hash(state);
2554 }
2555 }
2556
2557 impl AddressedTube {
2558 pub fn send<T: Serialize>(&self, msg: &T) -> Result<(), base::TubeError> {
2559 self.tube.send(msg)
2560 }
2561 }
2562
2563 fn find_registered_tube<'a>(
2564 registered_tubes: &'a HashMap<RegisteredEvent, HashSet<AddressedTube>>,
2565 socket_addr: &str,
2566 event: RegisteredEvent,
2567 ) -> (Option<&'a Rc<Tube>>, bool) {
2568 let mut registered_tube: Option<&Rc<Tube>> = None;
2569 let mut already_registered = false;
2570 'outer: for (evt, addr_tubes) in registered_tubes {
2571 for addr_tube in addr_tubes {
2572 if addr_tube.socket_addr == socket_addr {
2573 if *evt == event {
2574 already_registered = true;
2575 break 'outer;
2576 }
2577 // Since all tubes of the same addr should
2578 // be an RC to the same tube, it doesn't
2579 // matter which one we get. But we do need
2580 // to check for a registration for the
2581 // current event, so can't break here.
2582 registered_tube = Some(&addr_tube.tube);
2583 }
2584 }
2585 }
2586 (registered_tube, already_registered)
2587 }
2588
2589 fn make_addr_tube_from_maybe_existing(
2590 tube: Option<&Rc<Tube>>,
2591 addr: String,
2592 ) -> Result<AddressedTube> {
2593 if let Some(registered_tube) = tube {
2594 Ok(AddressedTube {
2595 tube: registered_tube.clone(),
2596 socket_addr: addr,
2597 })
2598 } else {
2599 let sock = UnixSeqpacket::connect(addr.clone()).with_context(|| {
2600 format!("failed to connect to registered listening socket {}", addr)
2601 })?;
2602 let tube = Tube::new_from_unix_seqpacket(sock);
2603 Ok(AddressedTube {
2604 tube: Rc::new(tube),
2605 socket_addr: addr,
2606 })
2607 }
2608 }
2609
2610 let mut iommu_client = iommu_host_tube
2611 .as_ref()
2612 .map(VmMemoryRequestIommuClient::new);
2613
2614 stdin()
2615 .set_raw_mode()
2616 .expect("failed to set terminal raw mode");
2617
2618 let sys_allocator_mutex = Arc::new(Mutex::new(sys_allocator));
2619
2620 let wait_ctx = WaitContext::build_with(&[
2621 (&linux.suspend_evt, Token::Suspend),
2622 (&sigchld_fd, Token::ChildSignal),
2623 (&vm_evt_rdtube, Token::VmEvent),
2624 (®_evt_rdtube, Token::RegisteredEvent),
2625 ])
2626 .context("failed to build wait context")?;
2627
2628 if let Some(socket_server) = &control_server_socket {
2629 wait_ctx
2630 .add(socket_server, Token::VmControlServer)
2631 .context("failed to add descriptor to wait context")?;
2632 }
2633 for (index, socket) in control_tubes.iter().enumerate() {
2634 wait_ctx
2635 .add(socket.as_ref(), Token::VmControl { index })
2636 .context("failed to add descriptor to wait context")?;
2637 }
2638
2639 if cfg.jail_config.is_some() {
2640 // Before starting VCPUs, in case we started with some capabilities, drop them all.
2641 drop_capabilities().context("failed to drop process capabilities")?;
2642 }
2643
2644 #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), feature = "gdb"))]
2645 // Create a channel for GDB thread.
2646 let (to_gdb_channel, from_vcpu_channel) = if linux.gdb.is_some() {
2647 let (s, r) = mpsc::channel();
2648 (Some(s), Some(r))
2649 } else {
2650 (None, None)
2651 };
2652
2653 let (device_ctrl_tube, device_ctrl_resp) = Tube::pair().context("failed to create tube")?;
2654 // Create devices thread, and restore if a restore file exists.
2655 linux.devices_thread = match create_devices_worker_thread(
2656 linux.vm.get_memory().clone(),
2657 linux.io_bus.clone(),
2658 linux.mmio_bus.clone(),
2659 device_ctrl_resp,
2660 ) {
2661 Ok(join_handle) => Some(join_handle),
2662 Err(e) => {
2663 return Err(anyhow!("Failed to start devices thread: {}", e));
2664 }
2665 };
2666
2667 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
2668 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
2669 let use_hypervisor_signals = !linux
2670 .vm
2671 .get_hypervisor()
2672 .check_capability(HypervisorCap::ImmediateExit);
2673 vcpu::setup_vcpu_signal_handler::<Vcpu>(use_hypervisor_signals)?;
2674
2675 let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
2676 Some(vec) => vec.into_iter().map(Some).collect(),
2677 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
2678 };
2679 // Enable core scheduling before creating vCPUs so that the cookie will be
2680 // shared by all vCPU threads.
2681 // TODO(b/199312402): Avoid enabling core scheduling for the crosvm process
2682 // itself for even better performance. Only vCPUs need the feature.
2683 if cfg.core_scheduling && cfg.per_vm_core_scheduling {
2684 if let Err(e) = enable_core_scheduling() {
2685 error!("Failed to enable core scheduling: {}", e);
2686 }
2687 }
2688 let vcpu_cgroup_tasks_file = match &cfg.vcpu_cgroup_path {
2689 None => None,
2690 Some(cgroup_path) => {
2691 // Move main process to cgroup_path
2692 let mut f = File::create(&cgroup_path.join("tasks")).with_context(|| {
2693 format!(
2694 "failed to create vcpu-cgroup-path {}",
2695 cgroup_path.display(),
2696 )
2697 })?;
2698 f.write_all(process::id().to_string().as_bytes())?;
2699 Some(f)
2700 }
2701 };
2702 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), unix))]
2703 let bus_lock_ratelimit_ctrl: Arc<Mutex<Ratelimit>> = Arc::new(Mutex::new(Ratelimit::new()));
2704 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), unix))]
2705 if cfg.bus_lock_ratelimit > 0 {
2706 let bus_lock_ratelimit = cfg.bus_lock_ratelimit;
2707 if linux.vm.check_capability(VmCap::BusLockDetect) {
2708 info!("Hypervisor support bus lock detect");
2709 linux
2710 .vm
2711 .enable_capability(VmCap::BusLockDetect, 0)
2712 .expect("kvm: Failed to enable bus lock detection cap");
2713 info!("Hypervisor enabled bus lock detect");
2714 bus_lock_ratelimit_ctrl
2715 .lock()
2716 .ratelimit_set_speed(bus_lock_ratelimit);
2717 } else {
2718 bail!("Kvm: bus lock detection unsuported");
2719 }
2720 }
2721
2722 #[cfg(target_os = "android")]
2723 android::set_process_profiles(&cfg.task_profiles)?;
2724
2725 let guest_suspended_cvar = Arc::new((Mutex::new(false), Condvar::new()));
2726
2727 #[allow(unused_mut)]
2728 let mut run_mode = VmRunMode::Running;
2729 #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), feature = "gdb"))]
2730 if to_gdb_channel.is_some() {
2731 // Wait until a GDB client attaches
2732 run_mode = VmRunMode::Breakpoint;
2733 }
2734 // If we are restoring from a snapshot, then start suspended.
2735 let (run_mode, post_restore_run_mode) = if cfg.restore_path.is_some() {
2736 (VmRunMode::Suspending, run_mode)
2737 } else {
2738 (run_mode, run_mode)
2739 };
2740
2741 // Architecture-specific code must supply a vcpu_init element for each VCPU.
2742 assert_eq!(vcpus.len(), linux.vcpu_init.len());
2743
2744 for ((cpu_id, vcpu), vcpu_init) in vcpus.into_iter().enumerate().zip(linux.vcpu_init.drain(..))
2745 {
2746 let (to_vcpu_channel, from_main_channel) = mpsc::channel();
2747 let vcpu_affinity = match linux.vcpu_affinity.clone() {
2748 Some(VcpuAffinity::Global(v)) => v,
2749 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
2750 None => Default::default(),
2751 };
2752
2753 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2754 let vcpu_hybrid_type = if !cfg.vcpu_hybrid_type.is_empty() {
2755 Some(*cfg.vcpu_hybrid_type.get(&cpu_id).unwrap())
2756 } else {
2757 None
2758 };
2759
2760 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2761 let cpu_config = Some(CpuConfigX86_64::new(
2762 cfg.force_calibrated_tsc_leaf,
2763 cfg.host_cpu_topology,
2764 cfg.enable_hwp,
2765 cfg.enable_pnp_data,
2766 cfg.no_smt,
2767 cfg.itmt,
2768 vcpu_hybrid_type,
2769 ));
2770 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), unix))]
2771 let bus_lock_ratelimit_ctrl = Arc::clone(&bus_lock_ratelimit_ctrl);
2772
2773 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
2774 let cpu_config = None;
2775
2776 let handle = vcpu::run_vcpu(
2777 cpu_id,
2778 vcpu_ids[cpu_id],
2779 vcpu,
2780 vcpu_init,
2781 linux.vm.try_clone().context("failed to clone vm")?,
2782 linux
2783 .irq_chip
2784 .try_box_clone()
2785 .context("failed to clone irqchip")?,
2786 linux.vcpu_count,
2787 linux.rt_cpus.contains(&cpu_id),
2788 vcpu_affinity,
2789 linux.delay_rt,
2790 vcpu_thread_barrier.clone(),
2791 linux.has_bios,
2792 (*linux.io_bus).clone(),
2793 (*linux.mmio_bus).clone(),
2794 vm_evt_wrtube
2795 .try_clone()
2796 .context("failed to clone vm event tube")?,
2797 linux.vm.check_capability(VmCap::PvClockSuspend),
2798 from_main_channel,
2799 use_hypervisor_signals,
2800 #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), feature = "gdb"))]
2801 to_gdb_channel.clone(),
2802 cfg.core_scheduling,
2803 cfg.per_vm_core_scheduling,
2804 cpu_config,
2805 cfg.privileged_vm,
2806 match vcpu_cgroup_tasks_file {
2807 None => None,
2808 Some(ref f) => Some(
2809 f.try_clone()
2810 .context("failed to clone vcpu cgroup tasks file")?,
2811 ),
2812 },
2813 cfg.userspace_msr.clone(),
2814 guest_suspended_cvar.clone(),
2815 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), unix))]
2816 bus_lock_ratelimit_ctrl,
2817 run_mode,
2818 )?;
2819 vcpu_handles.push((handle, to_vcpu_channel));
2820 }
2821
2822 #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), feature = "gdb"))]
2823 // Spawn GDB thread.
2824 if let Some((gdb_port_num, gdb_control_tube)) = linux.gdb.take() {
2825 let to_vcpu_channels = vcpu_handles
2826 .iter()
2827 .map(|(_handle, channel)| channel.clone())
2828 .collect();
2829 let target = GdbStub::new(
2830 gdb_control_tube,
2831 to_vcpu_channels,
2832 from_vcpu_channel.unwrap(), // Must succeed to unwrap()
2833 );
2834 std::thread::Builder::new()
2835 .name("gdb".to_owned())
2836 .spawn(move || gdb_thread(target, gdb_port_num))
2837 .context("failed to spawn GDB thread")?;
2838 };
2839
2840 let (irq_handler_control, irq_handler_control_for_thread) = Tube::pair()?;
2841 let sys_allocator_for_thread = sys_allocator_mutex.clone();
2842 let irq_chip_for_thread = linux.irq_chip.try_box_clone()?;
2843 let irq_handler_thread = std::thread::Builder::new()
2844 .name("irq_handler_thread".into())
2845 .spawn(move || {
2846 irq_handler_thread(
2847 irq_control_tubes,
2848 irq_chip_for_thread,
2849 sys_allocator_for_thread,
2850 irq_handler_control_for_thread,
2851 )
2852 })
2853 .unwrap();
2854
2855 vcpu_thread_barrier.wait();
2856
2857 // Restore VM (if applicable).
2858 // Must happen after the vCPU barrier to avoid deadlock.
2859 if let Some(path) = &cfg.restore_path {
2860 vm_control::do_restore(
2861 path.clone(),
2862 |msg| vcpu::kick_all_vcpus(&vcpu_handles, linux.irq_chip.as_irq_chip(), msg),
2863 |msg, index| {
2864 vcpu::kick_vcpu(&vcpu_handles.get(index), linux.irq_chip.as_irq_chip(), msg)
2865 },
2866 &device_ctrl_tube,
2867 linux.vcpu_count,
2868 )?;
2869 // Allow the vCPUs to start for real.
2870 vcpu::kick_all_vcpus(
2871 &vcpu_handles,
2872 linux.irq_chip.as_irq_chip(),
2873 VcpuControl::RunState(post_restore_run_mode),
2874 )
2875 }
2876
2877 let mut exit_state = ExitState::Stop;
2878 let mut pvpanic_code = PvPanicCode::Unknown;
2879 #[cfg(feature = "balloon")]
2880 let mut balloon_stats_id: u64 = 0;
2881 #[cfg(feature = "balloon")]
2882 let mut balloon_wss_id: u64 = 0;
2883 let mut registered_evt_tubes: HashMap<RegisteredEvent, HashSet<AddressedTube>> = HashMap::new();
2884
2885 'wait: loop {
2886 let events = {
2887 match wait_ctx.wait() {
2888 Ok(v) => v,
2889 Err(e) => {
2890 error!("failed to poll: {}", e);
2891 break;
2892 }
2893 }
2894 };
2895
2896 let mut vm_control_indices_to_remove = Vec::new();
2897 for event in events.iter().filter(|e| e.is_readable) {
2898 match event.token {
2899 Token::RegisteredEvent => match reg_evt_rdtube.recv::<RegisteredEvent>() {
2900 Ok(reg_evt) => {
2901 let mut tubes_to_remove: Vec<String> = Vec::new();
2902 if let Some(tubes) = registered_evt_tubes.get_mut(®_evt) {
2903 for tube in tubes.iter() {
2904 if let Err(e) = tube.send(®_evt) {
2905 warn!(
2906 "failed to send registered event {:?} to {}, removing from \
2907 registrations: {}",
2908 reg_evt, tube.socket_addr, e
2909 );
2910 tubes_to_remove.push(tube.socket_addr.clone());
2911 }
2912 }
2913 }
2914 for tube_addr in tubes_to_remove {
2915 for tubes in registered_evt_tubes.values_mut() {
2916 tubes.retain(|t| t.socket_addr != tube_addr);
2917 }
2918 }
2919 registered_evt_tubes.retain(|_, tubes| !tubes.is_empty());
2920 }
2921 Err(e) => {
2922 warn!("failed to recv RegisteredEvent: {}", e);
2923 }
2924 },
2925 Token::VmEvent => {
2926 let mut break_to_wait: bool = true;
2927 match vm_evt_rdtube.recv::<VmEventType>() {
2928 Ok(vm_event) => match vm_event {
2929 VmEventType::Exit => {
2930 info!("vcpu requested shutdown");
2931 exit_state = ExitState::Stop;
2932 }
2933 VmEventType::Reset => {
2934 info!("vcpu requested reset");
2935 exit_state = ExitState::Reset;
2936 }
2937 VmEventType::Crash => {
2938 info!("vcpu crashed");
2939 exit_state = ExitState::Crash;
2940 }
2941 VmEventType::Panic(panic_code) => {
2942 pvpanic_code = PvPanicCode::from_u8(panic_code);
2943 info!("Guest reported panic [Code: {}]", pvpanic_code);
2944 break_to_wait = false;
2945 }
2946 VmEventType::WatchdogReset => {
2947 info!("vcpu stall detected");
2948 exit_state = ExitState::WatchdogReset;
2949 }
2950 },
2951 Err(e) => {
2952 warn!("failed to recv VmEvent: {}", e);
2953 }
2954 }
2955 if break_to_wait {
2956 if pvpanic_code == PvPanicCode::Panicked {
2957 exit_state = ExitState::GuestPanic;
2958 }
2959 break 'wait;
2960 }
2961 }
2962 Token::Suspend => {
2963 info!("VM requested suspend");
2964 linux.suspend_evt.wait().unwrap();
2965 vcpu::kick_all_vcpus(
2966 &vcpu_handles,
2967 linux.irq_chip.as_irq_chip(),
2968 VcpuControl::RunState(VmRunMode::Suspending),
2969 );
2970 }
2971 Token::ChildSignal => {
2972 // Print all available siginfo structs, then exit the loop if child process has
2973 // been exited except CLD_STOPPED and CLD_CONTINUED. the two should be ignored
2974 // here since they are used by the vmm-swap feature.
2975 let mut do_exit = false;
2976 while let Some(siginfo) =
2977 sigchld_fd.read().context("failed to create signalfd")?
2978 {
2979 let pid = siginfo.ssi_pid;
2980 let pid_label = match linux.pid_debug_label_map.get(&pid) {
2981 Some(label) => format!("{} (pid {})", label, pid),
2982 None => format!("pid {}", pid),
2983 };
2984
2985 // TODO(kawasin): this is a temporary exception until device suspension.
2986 #[cfg(feature = "swap")]
2987 if siginfo.ssi_code == libc::CLD_STOPPED
2988 || siginfo.ssi_code == libc::CLD_CONTINUED
2989 {
2990 continue;
2991 }
2992
2993 error!(
2994 "child {} exited: signo {}, status {}, code {}",
2995 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
2996 );
2997 do_exit = true;
2998 }
2999 if do_exit {
3000 exit_state = ExitState::Crash;
3001 break 'wait;
3002 }
3003 }
3004 Token::VmControlServer => {
3005 if let Some(socket_server) = &control_server_socket {
3006 match socket_server.accept() {
3007 Ok(socket) => {
3008 wait_ctx
3009 .add(
3010 &socket,
3011 Token::VmControl {
3012 index: control_tubes.len(),
3013 },
3014 )
3015 .context("failed to add descriptor to wait context")?;
3016 control_tubes.push(TaggedControlTube::Vm(
3017 Tube::new_from_unix_seqpacket(socket),
3018 ));
3019 }
3020 Err(e) => error!("failed to accept socket: {}", e),
3021 }
3022 }
3023 }
3024 Token::VmControl { index } => {
3025 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3026 let mut add_tubes = Vec::new();
3027 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3028 let mut add_irq_control_tubes = Vec::new();
3029 if let Some(socket) = control_tubes.get(index) {
3030 match socket {
3031 TaggedControlTube::Vm(tube) => match tube.recv::<VmRequest>() {
3032 Ok(request) => {
3033 let mut suspend_requested = false;
3034 let mut run_mode_opt = None;
3035 let response = match request {
3036 VmRequest::HotPlugCommand { device, add } => {
3037 #[cfg(any(
3038 target_arch = "x86",
3039 target_arch = "x86_64"
3040 ))]
3041 {
3042 handle_hotplug_command(
3043 &mut linux,
3044 &mut sys_allocator_mutex.lock(),
3045 &cfg,
3046 &mut add_irq_control_tubes,
3047 &mut add_tubes,
3048 &hp_control_tube,
3049 &iommu_host_tube,
3050 &device,
3051 add,
3052 #[cfg(feature = "swap")]
3053 swap_controller.as_ref(),
3054 )
3055 }
3056
3057 #[cfg(not(any(
3058 target_arch = "x86",
3059 target_arch = "x86_64"
3060 )))]
3061 {
3062 // Suppress warnings.
3063 let _ = (device, add);
3064 VmResponse::Ok
3065 }
3066 }
3067 VmRequest::RegisterListener { socket_addr, event } => {
3068 let (registered_tube, already_registered) =
3069 find_registered_tube(
3070 ®istered_evt_tubes,
3071 &socket_addr,
3072 event,
3073 );
3074
3075 if !already_registered {
3076 let addr_tube = make_addr_tube_from_maybe_existing(
3077 registered_tube,
3078 socket_addr,
3079 )?;
3080
3081 if let Some(tubes) =
3082 registered_evt_tubes.get_mut(&event)
3083 {
3084 tubes.insert(addr_tube);
3085 } else {
3086 registered_evt_tubes.insert(
3087 event,
3088 vec![addr_tube].into_iter().collect(),
3089 );
3090 }
3091 }
3092 VmResponse::Ok
3093 }
3094 VmRequest::UnregisterListener { socket_addr, event } => {
3095 if let Some(tubes) =
3096 registered_evt_tubes.get_mut(&event)
3097 {
3098 tubes.retain(|t| t.socket_addr != socket_addr);
3099 }
3100 registered_evt_tubes
3101 .retain(|_, tubes| !tubes.is_empty());
3102 VmResponse::Ok
3103 }
3104 VmRequest::Unregister { socket_addr } => {
3105 for (_, tubes) in registered_evt_tubes.iter_mut() {
3106 tubes.retain(|t| t.socket_addr != socket_addr);
3107 }
3108 registered_evt_tubes
3109 .retain(|_, tubes| !tubes.is_empty());
3110 VmResponse::Ok
3111 }
3112 _ => {
3113 let response = request.execute(
3114 &mut run_mode_opt,
3115 #[cfg(feature = "balloon")]
3116 balloon_host_tube.as_ref(),
3117 #[cfg(feature = "balloon")]
3118 balloon_wss_host_tube.as_ref(),
3119 #[cfg(feature = "balloon")]
3120 &mut balloon_stats_id,
3121 #[cfg(feature = "balloon")]
3122 &mut balloon_wss_id,
3123 disk_host_tubes,
3124 &mut linux.pm,
3125 #[cfg(feature = "gpu")]
3126 &gpu_control_tube,
3127 #[cfg(feature = "usb")]
3128 Some(&usb_control_tube),
3129 #[cfg(not(feature = "usb"))]
3130 None,
3131 &mut linux.bat_control,
3132 |msg| {
3133 vcpu::kick_all_vcpus(
3134 &vcpu_handles,
3135 linux.irq_chip.as_irq_chip(),
3136 msg,
3137 )
3138 },
3139 |msg, index| {
3140 vcpu::kick_vcpu(
3141 &vcpu_handles.get(index),
3142 linux.irq_chip.as_irq_chip(),
3143 msg,
3144 )
3145 },
3146 cfg.force_s2idle,
3147 #[cfg(feature = "swap")]
3148 swap_controller.as_ref(),
3149 &device_ctrl_tube,
3150 vcpu_handles.len(),
3151 &irq_handler_control,
3152 );
3153
3154 // For non s2idle guest suspension we are done
3155 if let VmRequest::Suspend = request {
3156 if cfg.force_s2idle {
3157 suspend_requested = true;
3158
3159 // Spawn s2idle wait thread.
3160 let send_tube =
3161 tube.try_clone_send_tube().unwrap();
3162 let suspend_evt =
3163 linux.suspend_evt.try_clone().unwrap();
3164 let guest_suspended_cvar =
3165 guest_suspended_cvar.clone();
3166 let delayed_response = response.clone();
3167 let pm = linux.pm.clone();
3168
3169 std::thread::Builder::new()
3170 .name("s2idle_wait".to_owned())
3171 .spawn(move || {
3172 trigger_vm_suspend_and_wait_for_entry(
3173 guest_suspended_cvar,
3174 &send_tube,
3175 delayed_response,
3176 suspend_evt,
3177 pm,
3178 )
3179 })
3180 .context(
3181 "failed to spawn s2idle_wait thread",
3182 )?;
3183 }
3184 }
3185 response
3186 }
3187 };
3188
3189 // If suspend requested skip that step since it will be
3190 // performed by s2idle_wait thread when suspension actually
3191 // happens.
3192 if !suspend_requested {
3193 if let Err(e) = tube.send(&response) {
3194 error!("failed to send VmResponse: {}", e);
3195 }
3196 }
3197
3198 if let Some(run_mode) = run_mode_opt {
3199 info!("control socket changed run mode to {}", run_mode);
3200 match run_mode {
3201 VmRunMode::Exiting => {
3202 break 'wait;
3203 }
3204 other => {
3205 if other == VmRunMode::Running {
3206 for dev in &linux.resume_notify_devices {
3207 dev.lock().resume_imminent();
3208 }
3209 }
3210 // If suspend requested skip that step since it
3211 // will be performed by s2idle_wait thread when
3212 // needed.
3213 if !suspend_requested {
3214 vcpu::kick_all_vcpus(
3215 &vcpu_handles,
3216 linux.irq_chip.as_irq_chip(),
3217 VcpuControl::RunState(other),
3218 );
3219 }
3220 }
3221 }
3222 }
3223 }
3224 Err(e) => {
3225 if let TubeError::Disconnected = e {
3226 vm_control_indices_to_remove.push(index);
3227 } else {
3228 error!("failed to recv VmRequest: {}", e);
3229 }
3230 }
3231 },
3232 TaggedControlTube::VmMemory {
3233 tube,
3234 expose_with_viommu,
3235 } => match tube.recv::<VmMemoryRequest>() {
3236 Ok(request) => {
3237 let response = request.execute(
3238 &mut linux.vm,
3239 &mut sys_allocator_mutex.lock(),
3240 &mut gralloc,
3241 if *expose_with_viommu {
3242 iommu_client.as_mut()
3243 } else {
3244 None
3245 },
3246 );
3247 if let Err(e) = tube.send(&response) {
3248 error!("failed to send VmMemoryControlResponse: {}", e);
3249 }
3250 }
3251 Err(e) => {
3252 if let TubeError::Disconnected = e {
3253 vm_control_indices_to_remove.push(index);
3254 } else {
3255 error!("failed to recv VmMemoryControlRequest: {}", e);
3256 }
3257 }
3258 },
3259 TaggedControlTube::VmMsync(tube) => {
3260 match tube.recv::<VmMsyncRequest>() {
3261 Ok(request) => {
3262 let response = request.execute(&mut linux.vm);
3263 if let Err(e) = tube.send(&response) {
3264 error!("failed to send VmMsyncResponse: {}", e);
3265 }
3266 }
3267 Err(e) => {
3268 if let TubeError::Disconnected = e {
3269 vm_control_indices_to_remove.push(index);
3270 } else {
3271 error!("failed to recv VmMsyncRequest: {}", e);
3272 }
3273 }
3274 }
3275 }
3276 TaggedControlTube::Fs(tube) => match tube.recv::<FsMappingRequest>() {
3277 Ok(request) => {
3278 let response = request
3279 .execute(&mut linux.vm, &mut sys_allocator_mutex.lock());
3280 if let Err(e) = tube.send(&response) {
3281 error!("failed to send VmResponse: {}", e);
3282 }
3283 }
3284 Err(e) => {
3285 if let TubeError::Disconnected = e {
3286 vm_control_indices_to_remove.push(index);
3287 } else {
3288 error!("failed to recv VmResponse: {}", e);
3289 }
3290 }
3291 },
3292 }
3293 }
3294 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3295 if !add_tubes.is_empty() {
3296 for (idx, socket) in add_tubes.iter().enumerate() {
3297 wait_ctx
3298 .add(
3299 socket.as_ref(),
3300 Token::VmControl {
3301 index: idx + control_tubes.len(),
3302 },
3303 )
3304 .context(
3305 "failed to add hotplug vfio-pci descriptor to wait context",
3306 )?;
3307 }
3308 control_tubes.append(&mut add_tubes);
3309 }
3310 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3311 if !add_irq_control_tubes.is_empty() {
3312 irq_handler_control.send(&IrqHandlerRequest::AddIrqControlTubes(
3313 add_irq_control_tubes,
3314 ))?;
3315 }
3316 }
3317 }
3318 }
3319
3320 remove_hungup_and_drained_tubes(
3321 &events,
3322 &wait_ctx,
3323 &mut control_tubes,
3324 vm_control_indices_to_remove,
3325 |token: &Token| {
3326 if let Token::VmControl { index } = token {
3327 return Some(*index);
3328 }
3329 None
3330 },
3331 |index: usize| Token::VmControl { index },
3332 )?;
3333 }
3334
3335 vcpu::kick_all_vcpus(
3336 &vcpu_handles,
3337 linux.irq_chip.as_irq_chip(),
3338 VcpuControl::RunState(VmRunMode::Exiting),
3339 );
3340 for (handle, _) in vcpu_handles {
3341 if let Err(e) = handle.join() {
3342 error!("failed to join vcpu thread: {:?}", e);
3343 }
3344 }
3345
3346 #[cfg(feature = "swap")]
3347 // Stop the snapshot monitor process
3348 if let Some(swap_controller) = swap_controller {
3349 if let Err(e) = swap_controller.exit() {
3350 error!("failed to exit snapshot monitor process: {:?}", e);
3351 }
3352 }
3353
3354 // Stop pci root worker thread
3355 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3356 {
3357 let _ = hp_control_tube.send(PciRootCommand::Kill);
3358 if let Err(e) = hp_thread.join() {
3359 error!("failed to join hotplug thread: {:?}", e);
3360 }
3361 }
3362
3363 if linux.devices_thread.is_some() {
3364 if let Err(e) = device_ctrl_tube.send(&DeviceControlCommand::Exit) {
3365 error!("failed to stop device control loop: {}", e);
3366 };
3367 if let Some(thread) = linux.devices_thread.take() {
3368 if let Err(e) = thread.join() {
3369 error!("failed to exit devices thread: {:?}", e);
3370 }
3371 }
3372 }
3373
3374 // Shut down the IRQ handler thread.
3375 if let Err(e) = irq_handler_control.send(&IrqHandlerRequest::Exit) {
3376 error!("failed to request exit from IRQ handler thread: {}", e);
3377 }
3378 if let Err(e) = irq_handler_thread.join() {
3379 error!("failed to exit irq handler thread: {:?}", e);
3380 }
3381
3382 // At this point, the only remaining `Arc` references to the `Bus` objects should be the ones
3383 // inside `linux`. If the checks below fail, then some other thread is probably still running
3384 // and needs to be explicitly stopped before dropping `linux` to ensure devices actually get
3385 // cleaned up.
3386 match Arc::try_unwrap(std::mem::replace(&mut linux.mmio_bus, Arc::new(Bus::new()))) {
3387 Ok(_) => {}
3388 Err(_) => panic!("internal error: mmio_bus had more than one reference at shutdown"),
3389 }
3390 match Arc::try_unwrap(std::mem::replace(&mut linux.io_bus, Arc::new(Bus::new()))) {
3391 Ok(_) => {}
3392 Err(_) => panic!("internal error: io_bus had more than one reference at shutdown"),
3393 }
3394
3395 // Explicitly drop the VM structure here to allow the devices to clean up before the
3396 // control sockets are closed when this function exits.
3397 mem::drop(linux);
3398
3399 stdin()
3400 .set_canon_mode()
3401 .expect("failed to restore canonical mode for terminal");
3402
3403 Ok(exit_state)
3404 }
3405
3406 #[derive(EventToken)]
3407 enum IrqHandlerToken {
3408 IrqFd { index: IrqEventIndex },
3409 VmIrq { index: usize },
3410 DelayedIrqFd,
3411 HandlerControl,
3412 }
3413
3414 /// Handles IRQs and requests from devices to add additional IRQ lines.
irq_handler_thread( mut irq_control_tubes: Vec<Tube>, mut irq_chip: Box<dyn IrqChipArch + 'static>, sys_allocator_mutex: Arc<Mutex<SystemAllocator>>, handler_control: Tube, ) -> anyhow::Result<()>3415 fn irq_handler_thread(
3416 mut irq_control_tubes: Vec<Tube>,
3417 mut irq_chip: Box<dyn IrqChipArch + 'static>,
3418 sys_allocator_mutex: Arc<Mutex<SystemAllocator>>,
3419 handler_control: Tube,
3420 ) -> anyhow::Result<()> {
3421 let wait_ctx = WaitContext::build_with(&[(
3422 handler_control.get_read_notifier(),
3423 IrqHandlerToken::HandlerControl,
3424 )])
3425 .context("failed to build wait context")?;
3426
3427 if let Some(delayed_ioapic_irq_trigger) = irq_chip.irq_delayed_event_token()? {
3428 wait_ctx
3429 .add(&delayed_ioapic_irq_trigger, IrqHandlerToken::DelayedIrqFd)
3430 .context("failed to add descriptor to wait context")?;
3431 }
3432
3433 let events = irq_chip
3434 .irq_event_tokens()
3435 .context("failed get event tokens from irqchip")?;
3436
3437 for (index, _gsi, evt) in events {
3438 wait_ctx
3439 .add(&evt, IrqHandlerToken::IrqFd { index })
3440 .context("failed to add irq chip event tokens to wait context")?;
3441 }
3442
3443 for (index, socket) in irq_control_tubes.iter().enumerate() {
3444 wait_ctx
3445 .add(socket.get_read_notifier(), IrqHandlerToken::VmIrq { index })
3446 .context("irq control tubes to wait context")?;
3447 }
3448
3449 'wait: loop {
3450 let events = {
3451 match wait_ctx.wait() {
3452 Ok(v) => v,
3453 Err(e) => {
3454 error!("failed to poll: {}", e);
3455 break 'wait;
3456 }
3457 }
3458 };
3459 let token_count = events.len();
3460 let mut vm_irq_tubes_to_remove = Vec::new();
3461 let mut notify_control_on_iteration_end = false;
3462
3463 for event in events.iter().filter(|e| e.is_readable) {
3464 match event.token {
3465 IrqHandlerToken::HandlerControl => {
3466 match handler_control.recv::<IrqHandlerRequest>() {
3467 Ok(request) => {
3468 match request {
3469 IrqHandlerRequest::Exit => break 'wait,
3470 IrqHandlerRequest::AddIrqControlTubes(mut tubes) => {
3471 for (index, socket) in tubes.iter().enumerate() {
3472 wait_ctx
3473 .add(
3474 socket.get_read_notifier(),
3475 IrqHandlerToken::VmIrq {
3476 index: irq_control_tubes.len() + index,
3477 },
3478 )
3479 .context("failed to add new IRQ control Tube to wait context")?;
3480 }
3481 irq_control_tubes.append(&mut tubes);
3482 }
3483 IrqHandlerRequest::WakeAndNotifyIteration => {
3484 notify_control_on_iteration_end = true;
3485 }
3486 }
3487 }
3488 Err(e) => {
3489 if let TubeError::Disconnected = e {
3490 panic!("irq handler control tube disconnected.");
3491 } else {
3492 error!("failed to recv IrqHandlerRequest: {}", e);
3493 }
3494 }
3495 }
3496 }
3497 IrqHandlerToken::VmIrq { index } => {
3498 if let Some(tube) = irq_control_tubes.get(index) {
3499 handle_irq_tube_request(
3500 &sys_allocator_mutex,
3501 &mut irq_chip,
3502 &mut vm_irq_tubes_to_remove,
3503 &wait_ctx,
3504 tube,
3505 index,
3506 );
3507 }
3508 }
3509 IrqHandlerToken::IrqFd { index } => {
3510 if let Err(e) = irq_chip.service_irq_event(index) {
3511 error!("failed to signal irq {}: {}", index, e);
3512 }
3513 }
3514 IrqHandlerToken::DelayedIrqFd => {
3515 if let Err(e) = irq_chip.process_delayed_irq_events() {
3516 warn!("can't deliver delayed irqs: {}", e);
3517 }
3518 }
3519 }
3520 }
3521
3522 if notify_control_on_iteration_end {
3523 if let Err(e) = handler_control.send(&IrqHandlerResponse::HandlerIterationComplete(
3524 token_count - 1,
3525 )) {
3526 error!(
3527 "failed to notify on iteration completion (snapshotting may fail): {}",
3528 e
3529 );
3530 }
3531 }
3532
3533 remove_hungup_and_drained_tubes(
3534 &events,
3535 &wait_ctx,
3536 &mut irq_control_tubes,
3537 vm_irq_tubes_to_remove,
3538 |token: &IrqHandlerToken| {
3539 if let IrqHandlerToken::VmIrq { index } = token {
3540 return Some(*index);
3541 }
3542 None
3543 },
3544 |index: usize| IrqHandlerToken::VmIrq { index },
3545 )?;
3546 if events.iter().any(|e| {
3547 e.is_hungup && !e.is_readable && matches!(e.token, IrqHandlerToken::HandlerControl)
3548 }) {
3549 error!("IRQ handler control hung up but did not request an exit.");
3550 break 'wait;
3551 }
3552 }
3553 Ok(())
3554 }
3555
handle_irq_tube_request( sys_allocator_mutex: &Arc<Mutex<SystemAllocator>>, irq_chip: &mut Box<dyn IrqChipArch + 'static>, vm_irq_tubes_to_remove: &mut Vec<usize>, wait_ctx: &WaitContext<IrqHandlerToken>, tube: &Tube, tube_index: usize, )3556 fn handle_irq_tube_request(
3557 sys_allocator_mutex: &Arc<Mutex<SystemAllocator>>,
3558 irq_chip: &mut Box<dyn IrqChipArch + 'static>,
3559 vm_irq_tubes_to_remove: &mut Vec<usize>,
3560 wait_ctx: &WaitContext<IrqHandlerToken>,
3561 tube: &Tube,
3562 tube_index: usize,
3563 ) {
3564 match tube.recv::<VmIrqRequest>() {
3565 Ok(request) => {
3566 let response = {
3567 request.execute(
3568 |setup| match setup {
3569 IrqSetup::Event(irq, ev, device_id, queue_id, device_name) => {
3570 let irq_evt = devices::IrqEdgeEvent::from_event(ev.try_clone()?);
3571 let source = IrqEventSource {
3572 device_id: device_id.try_into().expect("Invalid device_id"),
3573 queue_id,
3574 device_name,
3575 };
3576 if let Some(event_index) =
3577 irq_chip.register_edge_irq_event(irq, &irq_evt, source)?
3578 {
3579 if let Err(e) =
3580 wait_ctx.add(ev, IrqHandlerToken::IrqFd { index: event_index })
3581 {
3582 warn!("failed to add IrqFd to poll context: {}", e);
3583 return Err(e);
3584 }
3585 }
3586 Ok(())
3587 }
3588 IrqSetup::Route(route) => irq_chip.route_irq(route),
3589 IrqSetup::UnRegister(irq, ev) => {
3590 let irq_evt = devices::IrqEdgeEvent::from_event(ev.try_clone()?);
3591 irq_chip.unregister_edge_irq_event(irq, &irq_evt)
3592 }
3593 },
3594 &mut sys_allocator_mutex.lock(),
3595 )
3596 };
3597 if let Err(e) = tube.send(&response) {
3598 error!("failed to send VmIrqResponse: {}", e);
3599 }
3600 }
3601 Err(e) => {
3602 if let TubeError::Disconnected = e {
3603 vm_irq_tubes_to_remove.push(tube_index);
3604 } else {
3605 error!("failed to recv VmIrqRequest: {}", e);
3606 }
3607 }
3608 }
3609 }
3610
3611 /// When control tubes hang up, we want to make sure that we've fully drained
3612 /// the underlying socket before removing it. This function also handles
3613 /// removing closed sockets in such a way that avoids phantom events.
3614 ///
3615 /// `tube_indices_to_remove` is the set of indices that we already know should
3616 /// be removed (e.g. from getting a disconnect error on read).
remove_hungup_and_drained_tubes<T, U>( events: &SmallVec<[TriggeredEvent<T>; 16]>, wait_ctx: &WaitContext<T>, tubes: &mut Vec<U>, mut tube_indices_to_remove: Vec<usize>, get_tube_index: fn(token: &T) -> Option<usize>, make_token_for_tube: fn(usize) -> T, ) -> anyhow::Result<()> where T: EventToken, U: ReadNotifier,3617 fn remove_hungup_and_drained_tubes<T, U>(
3618 events: &SmallVec<[TriggeredEvent<T>; 16]>,
3619 wait_ctx: &WaitContext<T>,
3620 tubes: &mut Vec<U>,
3621 mut tube_indices_to_remove: Vec<usize>,
3622 get_tube_index: fn(token: &T) -> Option<usize>,
3623 make_token_for_tube: fn(usize) -> T,
3624 ) -> anyhow::Result<()>
3625 where
3626 T: EventToken,
3627 U: ReadNotifier,
3628 {
3629 // It's possible more data is readable and buffered while the socket is hungup,
3630 // so don't delete the tube from the poll context until we're sure all the
3631 // data is read.
3632 // Below case covers a condition where we have received a hungup event and the tube is not
3633 // readable.
3634 // In case of readable tube, once all data is read, any attempt to read more data on hungup
3635 // tube should fail. On such failure, we get Disconnected error and index gets added to
3636 // vm_control_indices_to_remove by the time we reach here.
3637 for event in events.iter().filter(|e| e.is_hungup && !e.is_readable) {
3638 if let Some(index) = get_tube_index(&event.token) {
3639 tube_indices_to_remove.push(index);
3640 }
3641 }
3642
3643 // Sort in reverse so the highest indexes are removed first. This removal algorithm
3644 // preserves correct indexes as each element is removed.
3645 tube_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
3646 tube_indices_to_remove.dedup();
3647 for index in tube_indices_to_remove {
3648 // Delete the socket from the `wait_ctx` synchronously. Otherwise, the kernel will do
3649 // this automatically when the FD inserted into the `wait_ctx` is closed after this
3650 // if-block, but this removal can be deferred unpredictably. In some instances where the
3651 // system is under heavy load, we can even get events returned by `wait_ctx` for an FD
3652 // that has already been closed. Because the token associated with that spurious event
3653 // now belongs to a different socket, the control loop will start to interact with
3654 // sockets that might not be ready to use. This can cause incorrect hangup detection or
3655 // blocking on a socket that will never be ready. See also: crbug.com/1019986
3656 if let Some(socket) = tubes.get(index) {
3657 wait_ctx
3658 .delete(socket.get_read_notifier())
3659 .context("failed to remove descriptor from wait context")?;
3660 }
3661
3662 // This line implicitly drops the socket at `index` when it gets returned by
3663 // `swap_remove`. After this line, the socket at `index` is not the one from
3664 // `tube_indices_to_remove`. Because of this socket's change in index, we need to
3665 // use `wait_ctx.modify` to change the associated index in its `Token::VmControl`.
3666 tubes.swap_remove(index);
3667 if let Some(tube) = tubes.get(index) {
3668 wait_ctx
3669 .modify(
3670 tube.get_read_notifier(),
3671 EventType::Read,
3672 make_token_for_tube(index),
3673 )
3674 .context("failed to add descriptor to wait context")?;
3675 }
3676 }
3677 Ok(())
3678 }
3679
3680 /// Start and jail a vhost-user device according to its configuration and a vhost listener string.
3681 ///
3682 /// The jailing business is nasty and potentially unsafe if done from the wrong context - do not
3683 /// call outside of `start_devices`!
3684 ///
3685 /// Returns the pid of the jailed device process.
jail_and_start_vu_device<T: VirtioDeviceBuilder>( jail_config: &Option<JailConfig>, params: T, vhost: &str, name: &str, ) -> anyhow::Result<(libc::pid_t, Option<Box<dyn std::any::Any>>)>3686 fn jail_and_start_vu_device<T: VirtioDeviceBuilder>(
3687 jail_config: &Option<JailConfig>,
3688 params: T,
3689 vhost: &str,
3690 name: &str,
3691 ) -> anyhow::Result<(libc::pid_t, Option<Box<dyn std::any::Any>>)> {
3692 let mut keep_rds = Vec::new();
3693
3694 base::syslog::push_descriptors(&mut keep_rds);
3695 cros_tracing::push_descriptors!(&mut keep_rds);
3696
3697 let jail_type = VhostUserListener::get_virtio_transport_type(vhost);
3698
3699 // Create a jail from the configuration. If the configuration is `None`, `create_jail` will also
3700 // return `None` so fall back to an empty (i.e. non-constrained) Minijail.
3701 let jail = params
3702 .create_jail(jail_config, jail_type)
3703 .with_context(|| format!("failed to create jail for {}", name))?
3704 .ok_or(())
3705 .or_else(|_| Minijail::new())
3706 .with_context(|| format!("failed to create empty jail for {}", name))?;
3707
3708 // Create the device in the parent process, so the child does not need any privileges necessary
3709 // to do it (only runtime capabilities are required).
3710 let device = params
3711 .create_vhost_user_device(&mut keep_rds)
3712 .context("failed to create vhost-user device")?;
3713 let mut listener = VhostUserListener::new(vhost, device.max_queue_num(), Some(&mut keep_rds))
3714 .context("failed to create the vhost listener")?;
3715 let parent_resources = listener.take_parent_process_resources();
3716
3717 let tz = std::env::var("TZ").unwrap_or_default();
3718
3719 // Executor must be created before jail in order to prevent the jailed process from creating
3720 // unrestricted io_urings.
3721 let ex = Executor::with_executor_kind(device.executor_kind().unwrap_or_default())
3722 .context("Failed to create an Executor")?;
3723 keep_rds.extend(ex.as_raw_descriptors());
3724
3725 // Deduplicate the FDs since minijail expects them to be unique.
3726 keep_rds.sort_unstable();
3727 keep_rds.dedup();
3728
3729 // Safe because we are keeping all the descriptors needed for the child to function.
3730 match unsafe { jail.fork(Some(&keep_rds)).context("error while forking")? } {
3731 0 => {
3732 // In the child process.
3733
3734 // Free memory for the resources managed by the parent, without running drop() on them.
3735 // The parent will do it as we exit.
3736 let _ = std::mem::ManuallyDrop::new(parent_resources);
3737
3738 // Make sure the child process does not survive its parent.
3739 if unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGKILL) } < 0 {
3740 panic!("call to prctl(PR_SET_DEATHSIG, SIGKILL) failed. Aborting child process.");
3741 }
3742
3743 // Set the name for the thread.
3744 const MAX_LEN: usize = 15; // pthread_setname_np() limit on Linux
3745 let debug_label_trimmed = &name.as_bytes()[..std::cmp::min(MAX_LEN, name.len())];
3746 let thread_name = CString::new(debug_label_trimmed).unwrap();
3747 // Safe because we trimmed the name to 15 characters (and pthread_setname_np will return
3748 // an error if we don't anyway).
3749 let _ = unsafe { libc::pthread_setname_np(libc::pthread_self(), thread_name.as_ptr()) };
3750
3751 // Preserve TZ for `chrono::Local` (b/257987535).
3752 std::env::set_var("TZ", tz);
3753
3754 // Run the device loop and terminate the child process once it exits.
3755 let res = match listener.run_device(ex, device) {
3756 Ok(()) => 0,
3757 Err(e) => {
3758 error!("error while running device {}: {:#}", name, e);
3759 1
3760 }
3761 };
3762 unsafe { libc::exit(res) };
3763 }
3764 pid => {
3765 // In the parent process. We will drop the device and listener when exiting this method.
3766 // This is fine as ownership for both has been transferred to the child process and they
3767 // will keep living there. We just retain `parent_resources` for things we are supposed
3768 // to clean up ourselves.
3769
3770 info!("process for device {} (PID {}) started", &name, pid);
3771 #[cfg(feature = "seccomp_trace")]
3772 debug!(
3773 "seccomp_trace {{\"event\": \"minijail_fork\", \"pid\": {}, \"name\": \"{}\", \"jail_addr\": \"0x{:x}\"}}",
3774 pid,
3775 &name,
3776 read_jail_addr(&jail)
3777 );
3778 Ok((pid, parent_resources))
3779 }
3780 }
3781 }
3782
process_vhost_user_control_request(tube: Tube, disk_host_tubes: &[Tube]) -> Result<()>3783 fn process_vhost_user_control_request(tube: Tube, disk_host_tubes: &[Tube]) -> Result<()> {
3784 let command = tube
3785 .recv::<VmRequest>()
3786 .context("failed to receive VmRequest")?;
3787 let resp = match command {
3788 VmRequest::DiskCommand {
3789 disk_index,
3790 ref command,
3791 } => match &disk_host_tubes.get(disk_index) {
3792 Some(tube) => handle_disk_command(command, tube),
3793 None => VmResponse::Err(base::Error::new(libc::ENODEV)),
3794 },
3795 request => {
3796 error!(
3797 "Request {:?} currently not supported in vhost user backend",
3798 request
3799 );
3800 VmResponse::Err(base::Error::new(libc::EPERM))
3801 }
3802 };
3803
3804 tube.send(&resp).context("failed to send VmResponse")?;
3805 Ok(())
3806 }
3807
start_vhost_user_control_server( control_server_socket: UnlinkUnixSeqpacketListener, disk_host_tubes: Vec<Tube>, )3808 fn start_vhost_user_control_server(
3809 control_server_socket: UnlinkUnixSeqpacketListener,
3810 disk_host_tubes: Vec<Tube>,
3811 ) {
3812 info!("Start vhost-user control server");
3813 loop {
3814 match control_server_socket.accept() {
3815 Ok(socket) => {
3816 let tube = Tube::new_from_unix_seqpacket(socket);
3817 if let Err(e) = process_vhost_user_control_request(tube, &disk_host_tubes) {
3818 error!("failed to process control request: {:#}", e);
3819 }
3820 }
3821 Err(e) => {
3822 error!("failed to establish connection: {}", e);
3823 }
3824 }
3825 }
3826 }
3827
start_devices(opts: DevicesCommand) -> anyhow::Result<()>3828 pub fn start_devices(opts: DevicesCommand) -> anyhow::Result<()> {
3829 if let Some(async_executor) = opts.async_executor {
3830 Executor::set_default_executor_kind(async_executor)
3831 .context("Failed to set the default async executor")?;
3832 }
3833
3834 struct DeviceJailInfo {
3835 // Unique name for the device, in the form `foomatic-0`.
3836 name: String,
3837 _drop_resources: Option<Box<dyn std::any::Any>>,
3838 }
3839
3840 fn add_device<T: VirtioDeviceBuilder>(
3841 i: usize,
3842 device_params: T,
3843 vhost: &str,
3844 jail_config: &Option<JailConfig>,
3845 devices_jails: &mut BTreeMap<libc::pid_t, DeviceJailInfo>,
3846 ) -> anyhow::Result<()> {
3847 let name = format!("{}-{}", T::NAME, i);
3848
3849 let (pid, _drop_resources) =
3850 jail_and_start_vu_device::<T>(jail_config, device_params, vhost, &name)?;
3851
3852 devices_jails.insert(
3853 pid,
3854 DeviceJailInfo {
3855 name,
3856 _drop_resources,
3857 },
3858 );
3859
3860 Ok(())
3861 }
3862
3863 let mut devices_jails: BTreeMap<libc::pid_t, DeviceJailInfo> = BTreeMap::new();
3864
3865 let jail = if opts.disable_sandbox {
3866 None
3867 } else {
3868 Some(opts.jail)
3869 };
3870
3871 // Create control server socket
3872 let control_server_socket = opts.control_socket.map(|path| {
3873 UnlinkUnixSeqpacketListener(
3874 UnixSeqpacketListener::bind(path).expect("Could not bind socket"),
3875 )
3876 });
3877
3878 // Create serial devices.
3879 for (i, params) in opts.serial.iter().enumerate() {
3880 let serial_config = ¶ms.device;
3881 add_device(i, serial_config, ¶ms.vhost, &jail, &mut devices_jails)?;
3882 }
3883
3884 let mut disk_host_tubes = Vec::new();
3885 let control_socket_exists = control_server_socket.is_some();
3886 // Create block devices.
3887 for (i, params) in opts.block.iter().enumerate() {
3888 let tube = if control_socket_exists {
3889 let (host_tube, device_tube) = Tube::pair().context("failed to create tube")?;
3890 disk_host_tubes.push(host_tube);
3891 Some(device_tube)
3892 } else {
3893 None
3894 };
3895 let disk_config = DiskConfig::new(¶ms.device, tube);
3896 add_device(i, disk_config, ¶ms.vhost, &jail, &mut devices_jails)?;
3897 }
3898
3899 // Create vsock devices.
3900 for (i, params) in opts.vsock.iter().enumerate() {
3901 add_device(i, ¶ms.device, ¶ms.vhost, &jail, &mut devices_jails)?;
3902 }
3903
3904 let ex = Executor::new()?;
3905 if let Some(control_server_socket) = control_server_socket {
3906 // Start the control server in the parent process.
3907 ex.spawn_blocking(move || {
3908 start_vhost_user_control_server(control_server_socket, disk_host_tubes)
3909 })
3910 .detach();
3911 }
3912
3913 // Now wait for all device processes to return.
3914 while !devices_jails.is_empty() {
3915 match base::platform::wait_for_pid(-1, 0) {
3916 Err(e) => panic!("error waiting for child process to complete: {:#}", e),
3917 Ok((Some(pid), wait_status)) => match devices_jails.remove_entry(&pid) {
3918 Some((_, info)) => {
3919 if let Some(status) = wait_status.code() {
3920 info!(
3921 "process for device {} (PID {}) exited with code {}",
3922 &info.name, pid, status
3923 );
3924 } else if let Some(signal) = wait_status.signal() {
3925 warn!(
3926 "process for device {} (PID {}) has been killed by signal {:?}",
3927 &info.name, pid, signal,
3928 );
3929 }
3930 }
3931 None => error!("pid {} is not one of our device processes", pid),
3932 },
3933 // `wait_for_pid` will necessarily return a PID because we asked to it wait for one to
3934 // complete.
3935 Ok((None, _)) => unreachable!(),
3936 }
3937 }
3938
3939 info!("all device processes have exited");
3940
3941 Ok(())
3942 }
3943
3944 /// Setup crash reporting for a process. Each process MUST provide a unique `product_type` to avoid
3945 /// making crash reports incomprehensible.
3946 #[cfg(feature = "crash-report")]
setup_emulator_crash_reporting(_cfg: &Config) -> anyhow::Result<String>3947 pub fn setup_emulator_crash_reporting(_cfg: &Config) -> anyhow::Result<String> {
3948 crash_report::setup_crash_reporting(crash_report::CrashReportAttributes {
3949 product_type: "emulator".to_owned(),
3950 pipe_name: None,
3951 report_uuid: None,
3952 product_name: None,
3953 product_version: None,
3954 })
3955 }
3956
3957 #[cfg(test)]
3958 mod tests {
3959 use std::path::PathBuf;
3960
3961 use super::*;
3962
3963 // Create a file-backed mapping parameters struct with the given `address` and `size` and other
3964 // parameters set to default values.
test_file_backed_mapping(address: u64, size: u64) -> FileBackedMappingParameters3965 fn test_file_backed_mapping(address: u64, size: u64) -> FileBackedMappingParameters {
3966 FileBackedMappingParameters {
3967 address,
3968 size,
3969 path: PathBuf::new(),
3970 offset: 0,
3971 writable: false,
3972 sync: false,
3973 align: false,
3974 }
3975 }
3976
3977 #[test]
guest_mem_file_backed_mappings_overlap()3978 fn guest_mem_file_backed_mappings_overlap() {
3979 // Base case: no file mappings; output layout should be identical.
3980 assert_eq!(
3981 punch_holes_in_guest_mem_layout_for_mappings(
3982 vec![
3983 (GuestAddress(0), 0xD000_0000, Default::default()),
3984 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
3985 ],
3986 &[]
3987 ),
3988 vec![
3989 (GuestAddress(0), 0xD000_0000, Default::default()),
3990 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
3991 ]
3992 );
3993
3994 // File mapping that does not overlap guest memory.
3995 assert_eq!(
3996 punch_holes_in_guest_mem_layout_for_mappings(
3997 vec![
3998 (GuestAddress(0), 0xD000_0000, Default::default()),
3999 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
4000 ],
4001 &[test_file_backed_mapping(0xD000_0000, 0x1000)]
4002 ),
4003 vec![
4004 (GuestAddress(0), 0xD000_0000, Default::default()),
4005 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
4006 ]
4007 );
4008
4009 // File mapping at the start of the low address space region.
4010 assert_eq!(
4011 punch_holes_in_guest_mem_layout_for_mappings(
4012 vec![
4013 (GuestAddress(0), 0xD000_0000, Default::default()),
4014 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
4015 ],
4016 &[test_file_backed_mapping(0, 0x2000)]
4017 ),
4018 vec![
4019 (
4020 GuestAddress(0x2000),
4021 0xD000_0000 - 0x2000,
4022 Default::default()
4023 ),
4024 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
4025 ]
4026 );
4027
4028 // File mapping at the end of the low address space region.
4029 assert_eq!(
4030 punch_holes_in_guest_mem_layout_for_mappings(
4031 vec![
4032 (GuestAddress(0), 0xD000_0000, Default::default()),
4033 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
4034 ],
4035 &[test_file_backed_mapping(0xD000_0000 - 0x2000, 0x2000)]
4036 ),
4037 vec![
4038 (GuestAddress(0), 0xD000_0000 - 0x2000, Default::default()),
4039 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
4040 ]
4041 );
4042
4043 // File mapping fully contained within the middle of the low address space region.
4044 assert_eq!(
4045 punch_holes_in_guest_mem_layout_for_mappings(
4046 vec![
4047 (GuestAddress(0), 0xD000_0000, Default::default()),
4048 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
4049 ],
4050 &[test_file_backed_mapping(0x1000, 0x2000)]
4051 ),
4052 vec![
4053 (GuestAddress(0), 0x1000, Default::default()),
4054 (
4055 GuestAddress(0x3000),
4056 0xD000_0000 - 0x3000,
4057 Default::default()
4058 ),
4059 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
4060 ]
4061 );
4062
4063 // File mapping at the start of the high address space region.
4064 assert_eq!(
4065 punch_holes_in_guest_mem_layout_for_mappings(
4066 vec![
4067 (GuestAddress(0), 0xD000_0000, Default::default()),
4068 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
4069 ],
4070 &[test_file_backed_mapping(0x1_0000_0000, 0x2000)]
4071 ),
4072 vec![
4073 (GuestAddress(0), 0xD000_0000, Default::default()),
4074 (
4075 GuestAddress(0x1_0000_2000),
4076 0x8_0000 - 0x2000,
4077 Default::default()
4078 ),
4079 ]
4080 );
4081
4082 // File mapping at the end of the high address space region.
4083 assert_eq!(
4084 punch_holes_in_guest_mem_layout_for_mappings(
4085 vec![
4086 (GuestAddress(0), 0xD000_0000, Default::default()),
4087 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
4088 ],
4089 &[test_file_backed_mapping(0x1_0008_0000 - 0x2000, 0x2000)]
4090 ),
4091 vec![
4092 (GuestAddress(0), 0xD000_0000, Default::default()),
4093 (
4094 GuestAddress(0x1_0000_0000),
4095 0x8_0000 - 0x2000,
4096 Default::default()
4097 ),
4098 ]
4099 );
4100
4101 // File mapping fully contained within the middle of the high address space region.
4102 assert_eq!(
4103 punch_holes_in_guest_mem_layout_for_mappings(
4104 vec![
4105 (GuestAddress(0), 0xD000_0000, Default::default()),
4106 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
4107 ],
4108 &[test_file_backed_mapping(0x1_0000_1000, 0x2000)]
4109 ),
4110 vec![
4111 (GuestAddress(0), 0xD000_0000, Default::default()),
4112 (GuestAddress(0x1_0000_0000), 0x1000, Default::default()),
4113 (
4114 GuestAddress(0x1_0000_3000),
4115 0x8_0000 - 0x3000,
4116 Default::default()
4117 ),
4118 ]
4119 );
4120
4121 // File mapping overlapping two guest memory regions.
4122 assert_eq!(
4123 punch_holes_in_guest_mem_layout_for_mappings(
4124 vec![
4125 (GuestAddress(0), 0xD000_0000, Default::default()),
4126 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
4127 ],
4128 &[test_file_backed_mapping(0xA000_0000, 0x60002000)]
4129 ),
4130 vec![
4131 (GuestAddress(0), 0xA000_0000, Default::default()),
4132 (
4133 GuestAddress(0x1_0000_2000),
4134 0x8_0000 - 0x2000,
4135 Default::default()
4136 ),
4137 ]
4138 );
4139 }
4140 }
4141