• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2022 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // TODO(b:240716507): There is huge chunk for code which depends on haxm, whpx or gvm to be enabled but
6 // isn't marked so. Remove this when we do so.
7 #![allow(dead_code, unused_imports, unused_variables, unreachable_code)]
8 
9 pub(crate) mod irq_wait;
10 pub(crate) mod main;
11 #[cfg(not(feature = "crash-report"))]
12 mod panic_hook;
13 
14 mod generic;
15 use generic as product;
16 pub(crate) mod run_vcpu;
17 
18 #[cfg(feature = "whpx")]
19 use std::arch::x86_64::__cpuid;
20 #[cfg(feature = "whpx")]
21 use std::arch::x86_64::__cpuid_count;
22 #[cfg(feature = "gpu")]
23 use std::collections::BTreeMap;
24 use std::collections::HashMap;
25 use std::fs::File;
26 use std::fs::OpenOptions;
27 use std::io::stdin;
28 use std::iter;
29 use std::mem;
30 use std::os::windows::fs::OpenOptionsExt;
31 use std::path::PathBuf;
32 use std::sync::Arc;
33 
34 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
35 use aarch64::AArch64 as Arch;
36 use acpi_tables::sdt::SDT;
37 use anyhow::anyhow;
38 use anyhow::bail;
39 use anyhow::Context;
40 use anyhow::Result;
41 use arch::LinuxArch;
42 use arch::RunnableLinuxVm;
43 use arch::VirtioDeviceStub;
44 use arch::VmComponents;
45 use arch::VmImage;
46 use base::enable_high_res_timers;
47 use base::error;
48 use base::info;
49 use base::open_file;
50 use base::warn;
51 #[cfg(feature = "gpu")]
52 use base::BlockingMode;
53 use base::Event;
54 use base::EventToken;
55 #[cfg(feature = "gpu")]
56 use base::FramingMode;
57 use base::FromRawDescriptor;
58 use base::ProtoTube;
59 use base::RawDescriptor;
60 use base::ReadNotifier;
61 use base::RecvTube;
62 use base::SendTube;
63 #[cfg(feature = "gpu")]
64 use base::StreamChannel;
65 use base::Terminal;
66 use base::TriggeredEvent;
67 use base::Tube;
68 use base::TubeError;
69 use base::VmEventType;
70 use base::WaitContext;
71 use broker_ipc::common_child_setup;
72 use broker_ipc::CommonChildStartupArgs;
73 use crosvm_cli::sys::windows::exit::Exit;
74 use crosvm_cli::sys::windows::exit::ExitContext;
75 use crosvm_cli::sys::windows::exit::ExitContextAnyhow;
76 use devices::create_devices_worker_thread;
77 use devices::serial_device::SerialHardware;
78 use devices::serial_device::SerialParameters;
79 use devices::tsc::get_tsc_sync_mitigations;
80 use devices::tsc::standard_deviation;
81 use devices::tsc::TscSyncMitigations;
82 use devices::virtio;
83 use devices::virtio::block::block::DiskOption;
84 #[cfg(feature = "audio")]
85 use devices::virtio::snd::common_backend::VirtioSnd;
86 #[cfg(feature = "audio")]
87 use devices::virtio::snd::parameters::Parameters as SndParameters;
88 #[cfg(feature = "gpu")]
89 use devices::virtio::vhost::user::device::gpu::sys::windows::GpuVmmConfig;
90 #[cfg(feature = "gpu")]
91 use devices::virtio::vhost::user::gpu::sys::windows::product::GpuBackendConfig as GpuBackendConfigProduct;
92 #[cfg(feature = "audio")]
93 use devices::virtio::vhost::user::snd::sys::windows::product::SndBackendConfig as SndBackendConfigProduct;
94 use devices::virtio::BalloonFeatures;
95 #[cfg(feature = "balloon")]
96 use devices::virtio::BalloonMode;
97 use devices::virtio::Console;
98 #[cfg(feature = "gpu")]
99 use devices::virtio::GpuParameters;
100 #[cfg(feature = "audio")]
101 use devices::Ac97Dev;
102 use devices::BusDeviceObj;
103 #[cfg(feature = "gvm")]
104 use devices::GvmIrqChip;
105 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
106 use devices::IrqChip;
107 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
108 use devices::IrqChipAArch64 as IrqChipArch;
109 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
110 use devices::IrqChipX86_64 as IrqChipArch;
111 use devices::UserspaceIrqChip;
112 use devices::VirtioPciDevice;
113 #[cfg(feature = "whpx")]
114 use devices::WhpxSplitIrqChip;
115 #[cfg(feature = "gpu")]
116 use gpu_display::EventDevice;
117 #[cfg(feature = "gvm")]
118 use hypervisor::gvm::Gvm;
119 #[cfg(feature = "gvm")]
120 use hypervisor::gvm::GvmVcpu;
121 #[cfg(feature = "gvm")]
122 use hypervisor::gvm::GvmVersion;
123 #[cfg(feature = "gvm")]
124 use hypervisor::gvm::GvmVm;
125 #[cfg(feature = "haxm")]
126 use hypervisor::haxm::get_use_ghaxm;
127 #[cfg(feature = "haxm")]
128 use hypervisor::haxm::set_use_ghaxm;
129 #[cfg(feature = "haxm")]
130 use hypervisor::haxm::Haxm;
131 #[cfg(feature = "haxm")]
132 use hypervisor::haxm::HaxmVcpu;
133 #[cfg(feature = "haxm")]
134 use hypervisor::haxm::HaxmVm;
135 #[cfg(feature = "whpx")]
136 use hypervisor::whpx::Whpx;
137 #[cfg(feature = "whpx")]
138 use hypervisor::whpx::WhpxFeature;
139 #[cfg(feature = "whpx")]
140 use hypervisor::whpx::WhpxVcpu;
141 #[cfg(feature = "whpx")]
142 use hypervisor::whpx::WhpxVm;
143 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
144 use hypervisor::CpuConfigX86_64;
145 use hypervisor::Hypervisor;
146 #[cfg(feature = "whpx")]
147 use hypervisor::HypervisorCap;
148 #[cfg(feature = "whpx")]
149 use hypervisor::HypervisorX86_64;
150 use hypervisor::ProtectionType;
151 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
152 use hypervisor::VcpuAArch64 as VcpuArch;
153 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
154 use hypervisor::VcpuX86_64 as VcpuArch;
155 #[cfg(any(feature = "gvm", feature = "whpx"))]
156 use hypervisor::Vm;
157 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
158 use hypervisor::VmAArch64 as VmArch;
159 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
160 use hypervisor::VmX86_64 as VmArch;
161 use irq_wait::IrqWaitWorker;
162 use jail::FakeMinijailStub as Minijail;
163 #[cfg(not(feature = "crash-report"))]
164 pub(crate) use panic_hook::set_panic_hook;
165 use product::create_snd_mute_tube_pair;
166 #[cfg(any(feature = "haxm", feature = "gvm", feature = "whpx"))]
167 use product::create_snd_state_tube;
168 use product::handle_pvclock_request;
169 use product::merge_session_invariants;
170 use product::run_ime_thread;
171 use product::set_package_name;
172 pub(crate) use product::setup_metrics_reporting;
173 use product::start_service_ipc_listener;
174 use product::RunControlArgs;
175 use product::ServiceVmState;
176 use product::Token;
177 use resources::SystemAllocator;
178 use run_vcpu::run_all_vcpus;
179 use run_vcpu::VcpuRunMode;
180 use rutabaga_gfx::RutabagaGralloc;
181 use sync::Mutex;
182 use tube_transporter::TubeToken;
183 use tube_transporter::TubeTransporterReader;
184 use vm_control::BalloonControlCommand;
185 use vm_control::DeviceControlCommand;
186 use vm_control::VmMemoryRequest;
187 use vm_control::VmRunMode;
188 use vm_memory::GuestAddress;
189 use vm_memory::GuestMemory;
190 use win_util::ProcessType;
191 #[cfg(feature = "whpx")]
192 use x86_64::cpuid::adjust_cpuid;
193 #[cfg(feature = "whpx")]
194 use x86_64::cpuid::CpuIdContext;
195 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "haxm"))]
196 use x86_64::get_cpu_manufacturer;
197 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "haxm"))]
198 use x86_64::CpuManufacturer;
199 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
200 use x86_64::X8664arch as Arch;
201 
202 use crate::crosvm::config::Config;
203 use crate::crosvm::config::Executable;
204 #[cfg(feature = "gpu")]
205 use crate::crosvm::config::TouchDeviceOption;
206 use crate::crosvm::sys::config::HypervisorKind;
207 #[cfg(any(feature = "gvm", feature = "whpx"))]
208 use crate::crosvm::sys::config::IrqChipKind;
209 use crate::crosvm::sys::windows::broker::BrokerTubes;
210 #[cfg(feature = "stats")]
211 use crate::crosvm::sys::windows::stats::StatisticsCollector;
212 #[cfg(feature = "gpu")]
213 pub(crate) use crate::sys::windows::product::get_gpu_product_configs;
214 #[cfg(feature = "audio")]
215 pub(crate) use crate::sys::windows::product::get_snd_product_configs;
216 use crate::sys::windows::product::log_descriptor;
217 #[cfg(feature = "audio")]
218 pub(crate) use crate::sys::windows::product::num_input_sound_devices;
219 #[cfg(feature = "audio")]
220 pub(crate) use crate::sys::windows::product::num_input_sound_streams;
221 use crate::sys::windows::product::spawn_anti_tamper_thread;
222 use crate::sys::windows::product::MetricEventType;
223 
224 const DEFAULT_GUEST_CID: u64 = 3;
225 
226 enum TaggedControlTube {
227     #[allow(dead_code)]
228     Vm(Tube),
229     VmMemory(Tube),
230     Product(product::TaggedControlTube),
231 }
232 
233 pub enum ExitState {
234     Reset,
235     Stop,
236     Crash,
237     #[allow(dead_code)]
238     GuestPanic,
239     WatchdogReset,
240 }
241 
242 type DeviceResult<T = VirtioDeviceStub> = Result<T>;
243 
create_vhost_user_block_device(cfg: &Config, disk_device_tube: Tube) -> DeviceResult244 fn create_vhost_user_block_device(cfg: &Config, disk_device_tube: Tube) -> DeviceResult {
245     let features = virtio::base_features(cfg.protection_type);
246     let dev =
247         virtio::vhost::user::vmm::VhostUserVirtioDevice::new_block(features, disk_device_tube)
248             .exit_context(
249                 Exit::VhostUserBlockDeviceNew,
250                 "failed to set up vhost-user block device",
251             )?;
252 
253     Ok(VirtioDeviceStub {
254         dev: Box::new(dev),
255         jail: None,
256     })
257 }
258 
create_block_device(cfg: &Config, disk: &DiskOption, disk_device_tube: Tube) -> DeviceResult259 fn create_block_device(cfg: &Config, disk: &DiskOption, disk_device_tube: Tube) -> DeviceResult {
260     let features = virtio::base_features(cfg.protection_type);
261     let dev = virtio::BlockAsync::new(
262         features,
263         disk.open()?,
264         disk.read_only,
265         disk.sparse,
266         disk.block_size,
267         false,
268         disk.id,
269         Some(disk_device_tube),
270         None,
271         None,
272         None,
273     )
274     .exit_context(Exit::BlockDeviceNew, "failed to create block device")?;
275 
276     Ok(VirtioDeviceStub {
277         dev: Box::new(dev),
278         jail: None,
279     })
280 }
281 
282 #[cfg(feature = "gpu")]
create_vhost_user_gpu_device(base_features: u64, vhost_user_tube: Tube) -> DeviceResult283 fn create_vhost_user_gpu_device(base_features: u64, vhost_user_tube: Tube) -> DeviceResult {
284     let dev =
285         virtio::vhost::user::vmm::VhostUserVirtioDevice::new_gpu(base_features, vhost_user_tube)
286             .exit_context(
287                 Exit::VhostUserGpuDeviceNew,
288                 "failed to set up vhost-user gpu device",
289             )?;
290 
291     Ok(VirtioDeviceStub {
292         dev: Box::new(dev),
293         jail: None,
294     })
295 }
296 
297 #[cfg(feature = "gpu")]
create_gpu_device( cfg: &Config, gpu_parameters: &GpuParameters, vm_evt_wrtube: &SendTube, resource_bridges: Vec<Tube>, event_devices: Vec<EventDevice>, product_args: GpuBackendConfigProduct, ) -> DeviceResult298 fn create_gpu_device(
299     cfg: &Config,
300     gpu_parameters: &GpuParameters,
301     vm_evt_wrtube: &SendTube,
302     resource_bridges: Vec<Tube>,
303     event_devices: Vec<EventDevice>,
304     product_args: GpuBackendConfigProduct,
305 ) -> DeviceResult {
306     let display_backends = vec![virtio::DisplayBackend::WinApi(
307         (&gpu_parameters.display_params[0]).into(),
308     )];
309     let features = virtio::base_features(cfg.protection_type);
310     let dev = product::create_gpu(
311         vm_evt_wrtube,
312         resource_bridges,
313         display_backends,
314         gpu_parameters,
315         event_devices,
316         features,
317         product_args,
318     )?;
319 
320     Ok(VirtioDeviceStub {
321         dev: Box::new(dev),
322         jail: None,
323     })
324 }
325 
326 #[cfg(feature = "audio")]
create_snd_device( cfg: &Config, parameters: SndParameters, _product_args: SndBackendConfigProduct, ) -> DeviceResult327 fn create_snd_device(
328     cfg: &Config,
329     parameters: SndParameters,
330     _product_args: SndBackendConfigProduct,
331 ) -> DeviceResult {
332     let features = virtio::base_features(cfg.protection_type);
333     let dev = VirtioSnd::new(features, parameters)
334         .exit_context(Exit::VirtioSoundDeviceNew, "failed to create snd device")?;
335 
336     Ok(VirtioDeviceStub {
337         dev: Box::new(dev),
338         jail: None,
339     })
340 }
341 
342 #[cfg(feature = "audio")]
create_vhost_user_snd_device(base_features: u64, vhost_user_tube: Tube) -> DeviceResult343 fn create_vhost_user_snd_device(base_features: u64, vhost_user_tube: Tube) -> DeviceResult {
344     let dev =
345         virtio::vhost::user::vmm::VhostUserVirtioDevice::new_snd(base_features, vhost_user_tube)
346             .exit_context(
347                 Exit::VhostUserSndDeviceNew,
348                 "failed to set up vhost-user snd device",
349             )?;
350 
351     Ok(VirtioDeviceStub {
352         dev: Box::new(dev),
353         jail: None,
354     })
355 }
356 
357 #[cfg(feature = "gpu")]
create_multi_touch_device( cfg: &Config, multi_touch_spec: &TouchDeviceOption, event_pipe: StreamChannel, idx: u32, ) -> DeviceResult358 fn create_multi_touch_device(
359     cfg: &Config,
360     multi_touch_spec: &TouchDeviceOption,
361     event_pipe: StreamChannel,
362     idx: u32,
363 ) -> DeviceResult {
364     let (width, height) = multi_touch_spec.get_size();
365     let dev = virtio::new_multi_touch(
366         idx,
367         event_pipe,
368         width,
369         height,
370         virtio::base_features(cfg.protection_type),
371     )
372     .exit_context(Exit::InputDeviceNew, "failed to set up input device")?;
373     Ok(VirtioDeviceStub {
374         dev: Box::new(dev),
375         jail: None,
376     })
377 }
378 
379 #[cfg(feature = "gpu")]
create_mouse_device(cfg: &Config, event_pipe: StreamChannel, idx: u32) -> DeviceResult380 fn create_mouse_device(cfg: &Config, event_pipe: StreamChannel, idx: u32) -> DeviceResult {
381     let dev = virtio::new_mouse(idx, event_pipe, virtio::base_features(cfg.protection_type))
382         .exit_context(Exit::InputDeviceNew, "failed to set up input device")?;
383     Ok(VirtioDeviceStub {
384         dev: Box::new(dev),
385         jail: None,
386     })
387 }
388 
389 #[cfg(feature = "slirp")]
create_vhost_user_net_device(cfg: &Config, net_device_tube: Tube) -> DeviceResult390 fn create_vhost_user_net_device(cfg: &Config, net_device_tube: Tube) -> DeviceResult {
391     let features = virtio::base_features(cfg.protection_type);
392     let dev = virtio::vhost::user::vmm::VhostUserVirtioDevice::new_net(features, net_device_tube)
393         .exit_context(
394         Exit::VhostUserNetDeviceNew,
395         "failed to set up vhost-user net device",
396     )?;
397 
398     Ok(VirtioDeviceStub {
399         dev: Box::new(dev),
400         jail: None,
401     })
402 }
403 
create_rng_device(cfg: &Config) -> DeviceResult404 fn create_rng_device(cfg: &Config) -> DeviceResult {
405     let dev = virtio::Rng::new(virtio::base_features(cfg.protection_type))
406         .exit_context(Exit::RngDeviceNew, "failed to set up rng")?;
407 
408     Ok(VirtioDeviceStub {
409         dev: Box::new(dev),
410         jail: None,
411     })
412 }
413 
create_console_device(cfg: &Config, param: &SerialParameters) -> DeviceResult414 fn create_console_device(cfg: &Config, param: &SerialParameters) -> DeviceResult {
415     let mut keep_rds = Vec::new();
416     let evt = Event::new().exit_context(Exit::CreateEvent, "failed to create event")?;
417     let dev = param
418         .create_serial_device::<Console>(cfg.protection_type, &evt, &mut keep_rds)
419         .exit_context(Exit::CreateConsole, "failed to create console device")?;
420 
421     Ok(VirtioDeviceStub {
422         dev: Box::new(dev),
423         jail: None,
424     })
425 }
426 
427 #[cfg(feature = "balloon")]
create_balloon_device( cfg: &Config, balloon_device_tube: Tube, dynamic_mapping_device_tube: Tube, inflate_tube: Option<Tube>, init_balloon_size: u64, ) -> DeviceResult428 fn create_balloon_device(
429     cfg: &Config,
430     balloon_device_tube: Tube,
431     dynamic_mapping_device_tube: Tube,
432     inflate_tube: Option<Tube>,
433     init_balloon_size: u64,
434 ) -> DeviceResult {
435     let balloon_features =
436         (cfg.balloon_page_reporting as u64) << BalloonFeatures::PageReporting as u64;
437     let dev = virtio::Balloon::new(
438         virtio::base_features(cfg.protection_type),
439         balloon_device_tube,
440         None,
441         dynamic_mapping_device_tube,
442         inflate_tube,
443         init_balloon_size,
444         if cfg.strict_balloon {
445             BalloonMode::Strict
446         } else {
447             BalloonMode::Relaxed
448         },
449         balloon_features,
450         None,
451     )
452     .exit_context(Exit::BalloonDeviceNew, "failed to create balloon")?;
453 
454     Ok(VirtioDeviceStub {
455         dev: Box::new(dev),
456         jail: None,
457     })
458 }
459 
create_vsock_device(cfg: &Config) -> DeviceResult460 fn create_vsock_device(cfg: &Config) -> DeviceResult {
461     // We only support a single guest, so we can confidently assign a default
462     // CID if one isn't provided. We choose the lowest non-reserved value.
463     let dev = virtio::vsock::Vsock::new(
464         cfg.vsock
465             .as_ref()
466             .map(|cfg| cfg.cid)
467             .unwrap_or(DEFAULT_GUEST_CID),
468         cfg.host_guid.clone(),
469         virtio::base_features(cfg.protection_type),
470     )
471     .exit_context(
472         Exit::UserspaceVsockDeviceNew,
473         "failed to create userspace vsock device",
474     )?;
475 
476     Ok(VirtioDeviceStub {
477         dev: Box::new(dev),
478         jail: None,
479     })
480 }
481 
create_virtio_devices( cfg: &mut Config, vm_evt_wrtube: &SendTube, #[allow(clippy::ptr_arg)] control_tubes: &mut Vec<TaggedControlTube>, disk_device_tubes: &mut Vec<Tube>, balloon_device_tube: Option<Tube>, pvclock_device_tube: Option<Tube>, dynamic_mapping_device_tube: Option<Tube>, inflate_tube: Option<Tube>, init_balloon_size: u64, tsc_frequency: u64, virtio_snd_state_device_tube: Option<Tube>, virtio_snd_control_device_tube: Option<Tube>, ) -> DeviceResult<Vec<VirtioDeviceStub>>482 fn create_virtio_devices(
483     cfg: &mut Config,
484     vm_evt_wrtube: &SendTube,
485     #[allow(clippy::ptr_arg)] control_tubes: &mut Vec<TaggedControlTube>,
486     disk_device_tubes: &mut Vec<Tube>,
487     balloon_device_tube: Option<Tube>,
488     pvclock_device_tube: Option<Tube>,
489     dynamic_mapping_device_tube: Option<Tube>,
490     inflate_tube: Option<Tube>,
491     init_balloon_size: u64,
492     tsc_frequency: u64,
493     virtio_snd_state_device_tube: Option<Tube>,
494     virtio_snd_control_device_tube: Option<Tube>,
495 ) -> DeviceResult<Vec<VirtioDeviceStub>> {
496     let mut devs = Vec::new();
497 
498     if cfg.block_vhost_user_tube.is_empty() {
499         // Disk devices must precede virtio-console devices or the kernel does not boot.
500         // TODO(b/171215421): figure out why this ordering is required and fix it.
501         for disk in &cfg.disks {
502             let disk_device_tube = disk_device_tubes.remove(0);
503             devs.push(create_block_device(cfg, disk, disk_device_tube)?);
504         }
505     } else {
506         info!("Starting up vhost user block backends...");
507         for _disk in &cfg.disks {
508             let disk_device_tube = cfg.block_vhost_user_tube.remove(0);
509             devs.push(create_vhost_user_block_device(cfg, disk_device_tube)?);
510         }
511     }
512 
513     for (_, param) in cfg
514         .serial_parameters
515         .iter()
516         .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
517     {
518         let dev = create_console_device(cfg, param)?;
519         devs.push(dev);
520     }
521 
522     #[cfg(feature = "audio")]
523     if product::virtio_sound_enabled() {
524         let snd_split_config = cfg
525             .snd_split_config
526             .as_mut()
527             .expect("snd_split_config must exist");
528         let snd_vmm_config = snd_split_config
529             .vmm_config
530             .as_mut()
531             .expect("snd_vmm_config must exist");
532         product::push_snd_control_tubes(control_tubes, snd_vmm_config);
533 
534         match snd_split_config.backend_config.take() {
535             None => {
536                 // No backend config present means the backend is running in another process.
537                 devs.push(create_vhost_user_snd_device(
538                     virtio::base_features(cfg.protection_type),
539                     snd_vmm_config
540                         .main_vhost_user_tube
541                         .take()
542                         .expect("Snd VMM vhost-user tube should be set"),
543                 )?);
544             }
545             Some(backend_config) => {
546                 // Backend config present, so initialize Snd in this process.
547                 devs.push(create_snd_device(
548                     cfg,
549                     backend_config.parameters,
550                     backend_config.product_config,
551                 )?);
552             }
553         }
554     }
555 
556     if let Some(tube) = pvclock_device_tube {
557         product::push_pvclock_device(cfg, &mut devs, tsc_frequency, tube);
558     }
559 
560     devs.push(create_rng_device(cfg)?);
561 
562     #[cfg(feature = "slirp")]
563     if let Some(net_vhost_user_tube) = cfg.net_vhost_user_tube.take() {
564         devs.push(create_vhost_user_net_device(cfg, net_vhost_user_tube)?);
565     }
566 
567     if let (Some(balloon_device_tube), Some(dynamic_mapping_device_tube)) =
568         (balloon_device_tube, dynamic_mapping_device_tube)
569     {
570         devs.push(create_balloon_device(
571             cfg,
572             balloon_device_tube,
573             dynamic_mapping_device_tube,
574             inflate_tube,
575             init_balloon_size,
576         )?);
577     }
578 
579     devs.push(create_vsock_device(cfg)?);
580 
581     #[cfg(feature = "gpu")]
582     if let Some(gpu_vmm_config) = cfg.gpu_vmm_config.take() {
583         devs.extend(create_virtio_gpu_and_input_devices(
584             cfg,
585             gpu_vmm_config,
586             control_tubes,
587         )?);
588     }
589 
590     Ok(devs)
591 }
592 
593 #[cfg(feature = "gpu")]
create_virtio_gpu_and_input_devices( cfg: &mut Config, mut gpu_vmm_config: GpuVmmConfig, #[allow(clippy::ptr_arg)] control_tubes: &mut Vec<TaggedControlTube>, ) -> DeviceResult<Vec<VirtioDeviceStub>>594 fn create_virtio_gpu_and_input_devices(
595     cfg: &mut Config,
596     mut gpu_vmm_config: GpuVmmConfig,
597     #[allow(clippy::ptr_arg)] control_tubes: &mut Vec<TaggedControlTube>,
598 ) -> DeviceResult<Vec<VirtioDeviceStub>> {
599     let mut devs = Vec::new();
600     let resource_bridges = Vec::<Tube>::new();
601 
602     if !cfg.virtio_single_touch.is_empty() {
603         unimplemented!("--single-touch is no longer supported. Use --multi-touch instead.");
604     }
605 
606     product::push_gpu_control_tubes(control_tubes, &mut gpu_vmm_config);
607 
608     // Iterate event devices, create the VMM end.
609     for (idx, pipe) in gpu_vmm_config
610         .input_event_multi_touch_pipes
611         .drain(..)
612         .enumerate()
613     {
614         devs.push(create_multi_touch_device(
615             cfg,
616             &cfg.virtio_multi_touch[idx],
617             pipe,
618             idx as u32,
619         )?);
620     }
621 
622     product::push_mouse_device(cfg, &mut gpu_vmm_config, &mut devs)?;
623 
624     for (idx, pipe) in gpu_vmm_config.input_event_mouse_pipes.drain(..).enumerate() {
625         devs.push(create_mouse_device(cfg, pipe, idx as u32)?);
626     }
627 
628     let keyboard_pipe = gpu_vmm_config
629         .input_event_keyboard_pipes
630         .pop()
631         .expect("at least one keyboard should be in GPU VMM config");
632     let dev = virtio::new_keyboard(
633         /* idx= */ 0,
634         keyboard_pipe,
635         virtio::base_features(cfg.protection_type),
636     )
637     .exit_context(Exit::InputDeviceNew, "failed to set up input device")?;
638 
639     devs.push(VirtioDeviceStub {
640         dev: Box::new(dev),
641         jail: None,
642     });
643 
644     match cfg.gpu_backend_config.take() {
645         None => {
646             // No backend config present means the backend is running in another process.
647             devs.push(create_vhost_user_gpu_device(
648                 virtio::base_features(cfg.protection_type),
649                 gpu_vmm_config
650                     .main_vhost_user_tube
651                     .take()
652                     .expect("GPU VMM vhost-user tube should be set"),
653             )?);
654         }
655         Some(backend_config) => {
656             // Backend config present, so initialize GPU in this process.
657             devs.push(create_gpu_device(
658                 cfg,
659                 &backend_config.params,
660                 &backend_config.exit_evt_wrtube,
661                 resource_bridges,
662                 backend_config.event_devices,
663                 backend_config.product_config,
664             )?);
665         }
666     }
667 
668     Ok(devs)
669 }
670 
create_devices( cfg: &mut Config, mem: &GuestMemory, exit_evt_wrtube: &SendTube, irq_control_tubes: &mut Vec<Tube>, control_tubes: &mut Vec<TaggedControlTube>, disk_device_tubes: &mut Vec<Tube>, balloon_device_tube: Option<Tube>, pvclock_device_tube: Option<Tube>, dynamic_mapping_device_tube: Option<Tube>, inflate_tube: Option<Tube>, init_balloon_size: u64, #[allow(unused)] ac97_device_tubes: Vec<Tube>, tsc_frequency: u64, virtio_snd_state_device_tube: Option<Tube>, virtio_snd_control_device_tube: Option<Tube>, ) -> DeviceResult<Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>>671 fn create_devices(
672     cfg: &mut Config,
673     mem: &GuestMemory,
674     exit_evt_wrtube: &SendTube,
675     irq_control_tubes: &mut Vec<Tube>,
676     control_tubes: &mut Vec<TaggedControlTube>,
677     disk_device_tubes: &mut Vec<Tube>,
678     balloon_device_tube: Option<Tube>,
679     pvclock_device_tube: Option<Tube>,
680     dynamic_mapping_device_tube: Option<Tube>,
681     inflate_tube: Option<Tube>,
682     init_balloon_size: u64,
683     #[allow(unused)] ac97_device_tubes: Vec<Tube>,
684     tsc_frequency: u64,
685     virtio_snd_state_device_tube: Option<Tube>,
686     virtio_snd_control_device_tube: Option<Tube>,
687 ) -> DeviceResult<Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>> {
688     let stubs = create_virtio_devices(
689         cfg,
690         exit_evt_wrtube,
691         control_tubes,
692         disk_device_tubes,
693         balloon_device_tube,
694         pvclock_device_tube,
695         dynamic_mapping_device_tube,
696         inflate_tube,
697         init_balloon_size,
698         tsc_frequency,
699         virtio_snd_state_device_tube,
700         virtio_snd_control_device_tube,
701     )?;
702 
703     let mut pci_devices = Vec::new();
704 
705     for stub in stubs {
706         let (msi_host_tube, msi_device_tube) =
707             Tube::pair().exit_context(Exit::CreateTube, "failed to create tube")?;
708         irq_control_tubes.push(msi_host_tube);
709 
710         let shared_memory_tube = if stub.dev.get_shared_memory_region().is_some() {
711             let (host_tube, device_tube) =
712                 Tube::pair().context("failed to create VVU proxy tube")?;
713             control_tubes.push(TaggedControlTube::VmMemory(host_tube));
714             Some(device_tube)
715         } else {
716             None
717         };
718 
719         let (ioevent_host_tube, ioevent_device_tube) =
720             Tube::pair().context("failed to create ioevent tube")?;
721         control_tubes.push(TaggedControlTube::VmMemory(ioevent_host_tube));
722 
723         let dev = Box::new(
724             VirtioPciDevice::new(
725                 mem.clone(),
726                 stub.dev,
727                 msi_device_tube,
728                 cfg.disable_virtio_intx,
729                 shared_memory_tube,
730                 ioevent_device_tube,
731             )
732             .exit_context(Exit::VirtioPciDev, "failed to create virtio pci dev")?,
733         ) as Box<dyn BusDeviceObj>;
734         pci_devices.push((dev, stub.jail));
735     }
736 
737     #[cfg(feature = "audio")]
738     if !product::virtio_sound_enabled() {
739         if cfg.ac97_parameters.len() != ac97_device_tubes.len() {
740             panic!(
741                 "{} Ac97 device(s) will be made, but only {} Ac97 device tubes are present.",
742                 cfg.ac97_parameters.len(),
743                 ac97_device_tubes.len()
744             );
745         }
746 
747         for (ac97_param, ac97_device_tube) in cfg
748             .ac97_parameters
749             .iter()
750             .zip(ac97_device_tubes.into_iter())
751         {
752             let dev = Ac97Dev::try_new(mem.clone(), ac97_param.clone(), ac97_device_tube)
753                 .exit_context(Exit::CreateAc97, "failed to create ac97 device")?;
754             pci_devices.push((Box::new(dev), None));
755         }
756     }
757 
758     Ok(pci_devices)
759 }
760 
761 #[derive(Debug)]
762 struct PvClockError(String);
763 
handle_readable_event<V: VmArch + 'static, Vcpu: VcpuArch + 'static>( event: &TriggeredEvent<Token>, vm_control_indices_to_remove: &mut Vec<usize>, service_vm_state: &mut ServiceVmState, ac97_host_tubes: &[Tube], ipc_main_loop_tube: Option<&Tube>, vm_evt_rdtube: &RecvTube, control_tubes: &[TaggedControlTube], guest_os: &mut RunnableLinuxVm<V, Vcpu>, sys_allocator_mutex: &Arc<Mutex<SystemAllocator>>, gralloc: &mut RutabagaGralloc, virtio_snd_host_mute_tube: &mut Option<Tube>, proto_main_loop_tube: Option<&ProtoTube>, anti_tamper_main_thread_tube: &Option<ProtoTube>, balloon_host_tube: &Option<Tube>, memory_size_mb: u64, vcpu_boxes: &Mutex<Vec<Box<dyn VcpuArch>>>, pvclock_host_tube: &Option<Tube>, run_mode_arc: &VcpuRunMode, ) -> Result<(bool, Option<ExitState>)>764 fn handle_readable_event<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
765     event: &TriggeredEvent<Token>,
766     vm_control_indices_to_remove: &mut Vec<usize>,
767     service_vm_state: &mut ServiceVmState,
768     ac97_host_tubes: &[Tube],
769     ipc_main_loop_tube: Option<&Tube>,
770     vm_evt_rdtube: &RecvTube,
771     control_tubes: &[TaggedControlTube],
772     guest_os: &mut RunnableLinuxVm<V, Vcpu>,
773     sys_allocator_mutex: &Arc<Mutex<SystemAllocator>>,
774     gralloc: &mut RutabagaGralloc,
775     virtio_snd_host_mute_tube: &mut Option<Tube>,
776     proto_main_loop_tube: Option<&ProtoTube>,
777     anti_tamper_main_thread_tube: &Option<ProtoTube>,
778     balloon_host_tube: &Option<Tube>,
779     memory_size_mb: u64,
780     vcpu_boxes: &Mutex<Vec<Box<dyn VcpuArch>>>,
781     pvclock_host_tube: &Option<Tube>,
782     run_mode_arc: &VcpuRunMode,
783 ) -> Result<(bool, Option<ExitState>)> {
784     match event.token {
785         Token::VmEvent => match vm_evt_rdtube.recv::<VmEventType>() {
786             Ok(vm_event) => {
787                 let exit_state = match vm_event {
788                     VmEventType::Exit => {
789                         info!("vcpu requested shutdown");
790                         Some(ExitState::Stop)
791                     }
792                     VmEventType::Reset => {
793                         info!("vcpu requested reset");
794                         Some(ExitState::Reset)
795                     }
796                     VmEventType::Crash => {
797                         info!("vcpu crashed");
798                         Some(ExitState::Crash)
799                     }
800                     VmEventType::Panic(_) => {
801                         error!("got pvpanic event. this event is not expected on Windows.");
802                         None
803                     }
804                     VmEventType::WatchdogReset => {
805                         info!("vcpu stall detected");
806                         Some(ExitState::WatchdogReset)
807                     }
808                 };
809                 return Ok((exit_state.is_some(), exit_state));
810             }
811             Err(e) => {
812                 warn!("failed to recv VmEvent: {}", e);
813             }
814         },
815         Token::BrokerShutdown => {
816             info!("main loop got broker shutdown event");
817             return Ok((true, None));
818         }
819         #[allow(clippy::collapsible_match)]
820         Token::VmControl { index } => {
821             if let Some(tube) = control_tubes.get(index) {
822                 #[allow(clippy::single_match)]
823                 match tube {
824                     TaggedControlTube::VmMemory(tube) => match tube.recv::<VmMemoryRequest>() {
825                         Ok(request) => {
826                             let response = request.execute(
827                                 &mut guest_os.vm,
828                                 &mut sys_allocator_mutex.lock(),
829                                 gralloc,
830                                 None,
831                             );
832                             if let Err(e) = tube.send(&response) {
833                                 error!("failed to send VmMemoryControlResponse: {}", e);
834                             }
835                         }
836                         Err(e) => {
837                             if let TubeError::Disconnected = e {
838                                 vm_control_indices_to_remove.push(index);
839                             } else {
840                                 error!("failed to recv VmMemoryControlRequest: {}", e);
841                             }
842                         }
843                     },
844                     TaggedControlTube::Product(product_tube) => {
845                         product::handle_tagged_control_tube_event(
846                             product_tube,
847                             virtio_snd_host_mute_tube,
848                             service_vm_state,
849                             ipc_main_loop_tube,
850                             ac97_host_tubes,
851                         )
852                     }
853                     _ => (),
854                     // TODO: handle vm_control messages.
855                     /* TaggedControlTube::Vm(tube) => match tube.recv::<VmRequest>() {
856                         Ok(request) => {
857                             let mut run_mode_opt = None;
858                             let response = request.execute(
859                                 &mut run_mode_opt,
860                                 disk_host_tubes,
861                             );
862                             if let Err(e) = tube.send(&response) {
863                                 error!("failed to send VmResponse: {}", e);
864                             }
865                             if let Some(run_mode) = run_mode_opt {
866                                 info!("control tube changed run mode to {}", run_mode);
867                                 match run_mode {
868                                     VmRunMode::Exiting => {
869                                         break 'poll;
870                                     }
871                                 }
872                             }
873                         }
874                         Err(e) => {
875                             if let TubeError::Disconnected = e {
876                                 vm_control_indices_to_remove.push(index);
877                             } else {
878                                 error!("failed to recv VmRequest: {}", e);
879                             }
880                         }
881                     }, */
882                 }
883             }
884         }
885         #[allow(unreachable_patterns)]
886         _ => product::handle_received_token(
887             &event.token,
888             ac97_host_tubes,
889             anti_tamper_main_thread_tube,
890             balloon_host_tube,
891             control_tubes,
892             guest_os,
893             ipc_main_loop_tube,
894             memory_size_mb,
895             proto_main_loop_tube,
896             pvclock_host_tube,
897             run_mode_arc,
898             service_vm_state,
899             vcpu_boxes,
900             virtio_snd_host_mute_tube,
901         ),
902     };
903     Ok((false, None))
904 }
905 
run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>( mut guest_os: RunnableLinuxVm<V, Vcpu>, sys_allocator: SystemAllocator, mut control_tubes: Vec<TaggedControlTube>, irq_control_tubes: Vec<Tube>, vm_evt_rdtube: RecvTube, vm_evt_wrtube: SendTube, broker_shutdown_evt: Option<Event>, balloon_host_tube: Option<Tube>, pvclock_host_tube: Option<Tube>, mut gralloc: RutabagaGralloc, #[cfg(feature = "stats")] stats: Option<Arc<Mutex<StatisticsCollector>>>, service_pipe_name: Option<String>, ac97_host_tubes: Vec<Tube>, memory_size_mb: u64, host_cpu_topology: bool, tsc_sync_mitigations: TscSyncMitigations, force_calibrated_tsc_leaf: bool, product_args: RunControlArgs, mut virtio_snd_host_mute_tube: Option<Tube>, restore_path: Option<PathBuf>, ) -> Result<ExitState>906 fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
907     mut guest_os: RunnableLinuxVm<V, Vcpu>,
908     sys_allocator: SystemAllocator,
909     mut control_tubes: Vec<TaggedControlTube>,
910     irq_control_tubes: Vec<Tube>,
911     vm_evt_rdtube: RecvTube,
912     vm_evt_wrtube: SendTube,
913     broker_shutdown_evt: Option<Event>,
914     balloon_host_tube: Option<Tube>,
915     pvclock_host_tube: Option<Tube>,
916     mut gralloc: RutabagaGralloc,
917     #[cfg(feature = "stats")] stats: Option<Arc<Mutex<StatisticsCollector>>>,
918     service_pipe_name: Option<String>,
919     ac97_host_tubes: Vec<Tube>,
920     memory_size_mb: u64,
921     host_cpu_topology: bool,
922     tsc_sync_mitigations: TscSyncMitigations,
923     force_calibrated_tsc_leaf: bool,
924     product_args: RunControlArgs,
925     mut virtio_snd_host_mute_tube: Option<Tube>,
926     restore_path: Option<PathBuf>,
927 ) -> Result<ExitState> {
928     let (ipc_main_loop_tube, proto_main_loop_tube, _service_ipc) =
929         start_service_ipc_listener(service_pipe_name)?;
930 
931     let mut service_vm_state = product::create_service_vm_state(memory_size_mb);
932 
933     let sys_allocator_mutex = Arc::new(Mutex::new(sys_allocator));
934 
935     let exit_evt = Event::new().exit_context(Exit::CreateEvent, "failed to create event")?;
936 
937     // Create a separate thread to wait on IRQ events. This is a natural division
938     // because IRQ interrupts have no dependencies on other events, and this lets
939     // us avoid approaching the Windows WaitForMultipleObjects 64-object limit.
940     let irq_join_handle = IrqWaitWorker::start(
941         exit_evt
942             .try_clone()
943             .exit_context(Exit::CloneEvent, "failed to clone event")?,
944         guest_os
945             .irq_chip
946             .try_box_clone()
947             .exit_context(Exit::CloneEvent, "failed to clone irq chip")?,
948         irq_control_tubes,
949         sys_allocator_mutex.clone(),
950     );
951 
952     let mut triggers = vec![(vm_evt_rdtube.get_read_notifier(), Token::VmEvent)];
953     product::push_triggers(&mut triggers, &ipc_main_loop_tube, &proto_main_loop_tube);
954     let wait_ctx = WaitContext::build_with(&triggers).exit_context(
955         Exit::WaitContextAdd,
956         "failed to add trigger to wait context",
957     )?;
958 
959     if let Some(evt) = broker_shutdown_evt.as_ref() {
960         wait_ctx.add(evt, Token::BrokerShutdown).exit_context(
961             Exit::WaitContextAdd,
962             "failed to add trigger to wait context",
963         )?;
964     }
965 
966     for (index, control_tube) in control_tubes.iter().enumerate() {
967         #[allow(clippy::single_match)]
968         match control_tube {
969             TaggedControlTube::VmMemory(tube) => {
970                 wait_ctx
971                     .add(tube.get_read_notifier(), Token::VmControl { index })
972                     .exit_context(
973                         Exit::WaitContextAdd,
974                         "failed to add trigger to wait context",
975                     )?;
976             }
977             TaggedControlTube::Product(product_tube) => wait_ctx
978                 .add(product_tube.get_read_notifier(), Token::VmControl { index })
979                 .exit_context(
980                     Exit::WaitContextAdd,
981                     "failed to add trigger to wait context",
982                 )?,
983             _ => (),
984         }
985     }
986 
987     let (device_ctrl_tube, device_ctrl_resp) = Tube::pair().context("failed to create tube")?;
988     guest_os.devices_thread = match create_devices_worker_thread(
989         guest_os.vm.get_memory().clone(),
990         guest_os.io_bus.clone(),
991         guest_os.mmio_bus.clone(),
992         device_ctrl_resp,
993     ) {
994         Ok(join_handle) => Some(join_handle),
995         Err(e) => {
996             return Err(anyhow!("Failed to start devices thread: {}", e));
997         }
998     };
999 
1000     let vcpus: Vec<Option<_>> = match guest_os.vcpus.take() {
1001         Some(vec) => vec.into_iter().map(|vcpu| Some(vcpu)).collect(),
1002         None => iter::repeat_with(|| None)
1003             .take(guest_os.vcpu_count)
1004             .collect(),
1005     };
1006 
1007     let anti_tamper_main_thread_tube = spawn_anti_tamper_thread(&wait_ctx);
1008 
1009     #[cfg(feature = "sandbox")]
1010     if sandbox::is_sandbox_target() {
1011         sandbox::TargetServices::get()
1012             .exit_context(Exit::SandboxError, "failed to create sandbox")?
1013             .expect("Could not create sandbox!")
1014             .lower_token();
1015     }
1016 
1017     let ime_thread = run_ime_thread(product_args, &exit_evt)?;
1018 
1019     let original_terminal_mode = stdin().set_raw_mode().ok();
1020 
1021     let vcpu_boxes: Arc<Mutex<Vec<Box<dyn VcpuArch>>>> = Arc::new(Mutex::new(Vec::new()));
1022     let run_mode_arc = Arc::new(VcpuRunMode::default());
1023     let vcpu_threads = run_all_vcpus(
1024         vcpus,
1025         vcpu_boxes.clone(),
1026         &guest_os,
1027         &exit_evt,
1028         &vm_evt_wrtube,
1029         &pvclock_host_tube,
1030         #[cfg(feature = "stats")]
1031         &stats,
1032         host_cpu_topology,
1033         run_mode_arc.clone(),
1034         tsc_sync_mitigations,
1035         force_calibrated_tsc_leaf,
1036     )?;
1037 
1038     // Restore VM (if applicable).
1039     if let Some(path) = restore_path {
1040         // TODO(b/273992211): Port the unix --restore code to Windows.
1041         todo!();
1042     }
1043 
1044     let mut exit_state = ExitState::Stop;
1045 
1046     'poll: loop {
1047         let events = {
1048             match wait_ctx.wait() {
1049                 Ok(v) => v,
1050                 Err(e) => {
1051                     error!("failed to wait: {}", e);
1052                     break;
1053                 }
1054             }
1055         };
1056 
1057         let mut vm_control_indices_to_remove = Vec::new();
1058         for event in events.iter().filter(|e| e.is_readable) {
1059             let (break_poll, state) = handle_readable_event(
1060                 event,
1061                 &mut vm_control_indices_to_remove,
1062                 &mut service_vm_state,
1063                 &ac97_host_tubes,
1064                 ipc_main_loop_tube.as_ref(),
1065                 &vm_evt_rdtube,
1066                 &control_tubes,
1067                 &mut guest_os,
1068                 &sys_allocator_mutex,
1069                 &mut gralloc,
1070                 &mut virtio_snd_host_mute_tube,
1071                 proto_main_loop_tube.as_ref(),
1072                 &anti_tamper_main_thread_tube,
1073                 &balloon_host_tube,
1074                 memory_size_mb,
1075                 vcpu_boxes.as_ref(),
1076                 &pvclock_host_tube,
1077                 run_mode_arc.as_ref(),
1078             )?;
1079             if let Some(state) = state {
1080                 exit_state = state;
1081             }
1082             if break_poll {
1083                 break 'poll;
1084             }
1085         }
1086         for event in events.iter().filter(|e| e.is_hungup) {
1087             match event.token {
1088                 Token::VmEvent | Token::BrokerShutdown => {}
1089                 #[allow(unused_variables)]
1090                 Token::VmControl { index } => {
1091                     // TODO: handle vm control messages as they get ported.
1092                     // It's possible more data is readable and buffered while the tube is hungup,
1093                     // so don't delete the tube from the poll context until we're sure all the
1094                     // data is read.
1095                     /*match control_tubes
1096                         .get(index)
1097                         .map(|s| s.as_ref().get_readable_bytes())
1098                     {
1099                         Some(Ok(0)) | Some(Err(_)) => vm_control_indices_to_remove.push(index),
1100                         Some(Ok(x)) => info!("control index {} has {} bytes readable", index, x),
1101                         _ => {}
1102                     }*/
1103                 }
1104                 #[allow(unreachable_patterns)]
1105                 _ => product::handle_hungup_event(&event.token),
1106             }
1107         }
1108 
1109         // Sort in reverse so the highest indexes are removed first. This removal algorithm
1110         // preserved correct indexes as each element is removed.
1111         //vm_control_indices_to_remove.sort_unstable_by(|a, b| b.cmp(a));
1112         vm_control_indices_to_remove.dedup();
1113         for index in vm_control_indices_to_remove {
1114             control_tubes.swap_remove(index);
1115             /*if let Some(tube) = control_tubes.get(index) {
1116                 wait_ctx
1117                     .modify(
1118                         tube, Token::VmControl { index },
1119                         EventType::Read
1120                     )
1121                     .exit_context(Exit::WaitContextAdd, "failed to add trigger to wait context")?;
1122             }*/
1123         }
1124     }
1125 
1126     info!("run_control poll loop completed, forcing vCPUs to exit...");
1127 
1128     // VCPU threads MUST see the VmRunMode flag, otherwise they may re-enter the VM.
1129     run_mode_arc.set_and_notify(VmRunMode::Exiting);
1130 
1131     // Force all vcpus to exit from the hypervisor
1132     for vcpu in vcpu_boxes.lock().iter() {
1133         vcpu.set_immediate_exit(true);
1134     }
1135 
1136     let mut res = Ok(exit_state);
1137     guest_os.irq_chip.kick_halted_vcpus();
1138     let _ = exit_evt.signal();
1139     // Ensure any child threads have ended by sending the Exit vm event (possibly again) to ensure
1140     // their run loops are aborted.
1141     let _ = vm_evt_wrtube.send::<VmEventType>(&VmEventType::Exit);
1142     for (i, thread) in vcpu_threads.into_iter().enumerate() {
1143         // wait till all the threads exit, so that guest_os.vm arc memory count is down to 1.
1144         // otherwise, we will hit a memory leak if we force kill the thread with terminate.
1145         match thread.join() {
1146             Ok(Err(e)) => {
1147                 error!("vcpu thread {} exited with an error: {}", i, e);
1148                 res = Err(e);
1149             }
1150             Ok(_) => {}
1151             Err(e) => error!("vcpu thread {} panicked: {:?}", i, e),
1152         }
1153     }
1154 
1155     info!("vCPU threads have exited.");
1156 
1157     if let Some(ime) = ime_thread {
1158         match ime.join() {
1159             Ok(Err(e)) => {
1160                 error!("ime thread exited with an error: {}", e);
1161                 if res.is_ok() {
1162                     // Prioritize past errors, but return this error if it is unique, otherwise just
1163                     // log it.
1164                     res = Err(e)
1165                 }
1166             }
1167             Ok(_) => {}
1168             Err(e) => error!("ime thread panicked: {:?}", e),
1169         }
1170     }
1171     info!("IME thread has exited.");
1172 
1173     // This cancels all the outstanding and any future blocking operations.
1174     // TODO(b/196911556): Shutdown executor for cleaner shutdown. Given we are using global, for a
1175     // cleaner shutdown we have to call disarm so that all the incoming requests are run and are
1176     // cancelled. If we call shutdown all blocking threads will go away and incoming operations
1177     // won't be scheduled to run and will be dropped leading to panic. I think ideal place to call
1178     // shutdown is when we drop non-global executor.
1179     cros_async::unblock_disarm();
1180     info!("blocking async pool has shut down.");
1181 
1182     let _ = irq_join_handle.join();
1183     info!("IrqWaitWorker has shut down.");
1184 
1185     #[cfg(feature = "stats")]
1186     if let Some(stats) = stats {
1187         println!("Statistics Collected:\n{}", stats.lock());
1188         println!("Statistics JSON:\n{}", stats.lock().json());
1189     }
1190 
1191     if let Some(mode) = original_terminal_mode {
1192         if let Err(e) = stdin().restore_mode(mode) {
1193             warn!("failed to restore terminal mode: {}", e);
1194         }
1195     }
1196 
1197     // Explicitly drop the VM structure here to allow the devices to clean up before the
1198     // control tubes are closed when this function exits.
1199     mem::drop(guest_os);
1200 
1201     info!("guest_os dropped, run_control is done.");
1202 
1203     res
1204 }
1205 
1206 #[cfg(feature = "gvm")]
1207 const GVM_MINIMUM_VERSION: GvmVersion = GvmVersion {
1208     major: 1,
1209     minor: 4,
1210     patch: 1,
1211 };
1212 
1213 #[cfg(feature = "gvm")]
create_gvm_vm(gvm: Gvm, mem: GuestMemory) -> Result<GvmVm>1214 fn create_gvm_vm(gvm: Gvm, mem: GuestMemory) -> Result<GvmVm> {
1215     match gvm.get_full_version() {
1216         Ok(version) => {
1217             if version < GVM_MINIMUM_VERSION {
1218                 error!(
1219                     "GVM version {} is below minimum version {}",
1220                     version, GVM_MINIMUM_VERSION
1221                 );
1222                 return Err(base::Error::new(libc::ENXIO).into());
1223             } else {
1224                 info!("Using GVM version {}.", version)
1225             }
1226         }
1227         Err(e) => {
1228             error!("unable to determine gvm version: {}", e);
1229             return Err(base::Error::new(libc::ENXIO).into());
1230         }
1231     }
1232     let vm = GvmVm::new(&gvm, mem)?;
1233     Ok(vm)
1234 }
1235 
1236 #[cfg(feature = "haxm")]
create_haxm_vm( haxm: Haxm, mem: GuestMemory, kernel_log_file: &Option<String>, ) -> Result<HaxmVm>1237 fn create_haxm_vm(
1238     haxm: Haxm,
1239     mem: GuestMemory,
1240     kernel_log_file: &Option<String>,
1241 ) -> Result<HaxmVm> {
1242     let vm = HaxmVm::new(&haxm, mem)?;
1243     if let Some(path) = kernel_log_file {
1244         use hypervisor::haxm::HAX_CAP_VM_LOG;
1245         if vm.check_raw_capability(HAX_CAP_VM_LOG) {
1246             match vm.register_log_file(path) {
1247                 Ok(_) => {}
1248                 Err(e) => match e.errno() {
1249                     libc::E2BIG => {
1250                         error!(
1251                             "kernel_log_file path is too long, kernel log file will not be written"
1252                         );
1253                     }
1254                     _ => return Err(e.into()),
1255                 },
1256             }
1257         } else {
1258             warn!(
1259                 "kernel_log_file specified but this version of HAXM does not support kernel log \
1260                   files"
1261             );
1262         }
1263     }
1264     Ok(vm)
1265 }
1266 
1267 #[cfg(feature = "whpx")]
1268 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
create_whpx_vm( whpx: Whpx, mem: GuestMemory, cpu_count: usize, no_smt: bool, apic_emulation: bool, force_calibrated_tsc_leaf: bool, vm_evt_wrtube: SendTube, ) -> Result<WhpxVm>1269 fn create_whpx_vm(
1270     whpx: Whpx,
1271     mem: GuestMemory,
1272     cpu_count: usize,
1273     no_smt: bool,
1274     apic_emulation: bool,
1275     force_calibrated_tsc_leaf: bool,
1276     vm_evt_wrtube: SendTube,
1277 ) -> Result<WhpxVm> {
1278     let cpu_config = CpuConfigX86_64::new(
1279         force_calibrated_tsc_leaf,
1280         false, /* host_cpu_topology */
1281         false, /* enable_hwp */
1282         false, /* enable_pnp_data */
1283         no_smt,
1284         false, /* itmt */
1285         None,  /* hybrid_type */
1286     );
1287 
1288     // context for non-cpu-specific cpuid results
1289     let ctx = CpuIdContext::new(
1290         0,
1291         cpu_count,
1292         None,
1293         cpu_config,
1294         whpx.check_capability(HypervisorCap::CalibratedTscLeafRequired),
1295         __cpuid_count,
1296         __cpuid,
1297     );
1298 
1299     // Get all cpuid entries that we should pre-set
1300     let mut cpuid = whpx.get_supported_cpuid()?;
1301 
1302     // Adjust them for crosvm
1303     for entry in cpuid.cpu_id_entries.iter_mut() {
1304         adjust_cpuid(entry, &ctx);
1305     }
1306 
1307     let vm = WhpxVm::new(
1308         &whpx,
1309         cpu_count,
1310         mem,
1311         cpuid,
1312         apic_emulation,
1313         Some(vm_evt_wrtube),
1314     )
1315     .exit_context(Exit::WhpxSetupError, "failed to create WHPX vm")?;
1316 
1317     Ok(vm)
1318 }
1319 
1320 #[cfg(feature = "gvm")]
create_gvm_irq_chip(vm: &GvmVm, vcpu_count: usize) -> base::Result<GvmIrqChip>1321 fn create_gvm_irq_chip(vm: &GvmVm, vcpu_count: usize) -> base::Result<GvmIrqChip> {
1322     info!("Creating GVM irqchip");
1323     let irq_chip = GvmIrqChip::new(vm.try_clone()?, vcpu_count)?;
1324     Ok(irq_chip)
1325 }
1326 
1327 #[cfg(feature = "whpx")]
1328 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
create_whpx_split_irq_chip( vm: &WhpxVm, ioapic_device_tube: Tube, ) -> base::Result<WhpxSplitIrqChip>1329 fn create_whpx_split_irq_chip(
1330     vm: &WhpxVm,
1331     ioapic_device_tube: Tube,
1332 ) -> base::Result<WhpxSplitIrqChip> {
1333     info!("Creating WHPX split irqchip");
1334     WhpxSplitIrqChip::new(
1335         vm.try_clone()?,
1336         ioapic_device_tube,
1337         None, // ioapic_pins
1338     )
1339 }
1340 
create_userspace_irq_chip<Vm, Vcpu>( vcpu_count: usize, ioapic_device_tube: Tube, ) -> base::Result<UserspaceIrqChip<Vcpu>> where Vm: VmArch + 'static, Vcpu: VcpuArch + 'static,1341 fn create_userspace_irq_chip<Vm, Vcpu>(
1342     vcpu_count: usize,
1343     ioapic_device_tube: Tube,
1344 ) -> base::Result<UserspaceIrqChip<Vcpu>>
1345 where
1346     Vm: VmArch + 'static,
1347     Vcpu: VcpuArch + 'static,
1348 {
1349     info!("Creating userspace irqchip");
1350     let irq_chip =
1351         UserspaceIrqChip::new(vcpu_count, ioapic_device_tube, /*ioapic_pins:*/ None)?;
1352     Ok(irq_chip)
1353 }
1354 
get_default_hypervisor() -> Result<HypervisorKind>1355 pub fn get_default_hypervisor() -> Result<HypervisorKind> {
1356     // The ordering here matters from most preferable to the least.
1357     #[cfg(feature = "whpx")]
1358     match hypervisor::whpx::Whpx::is_enabled() {
1359         true => return Ok(HypervisorKind::Whpx),
1360         false => warn!("Whpx not enabled."),
1361     };
1362     #[cfg(feature = "haxm")]
1363     if get_cpu_manufacturer() == CpuManufacturer::Intel {
1364         // Make sure Haxm device can be opened before selecting it.
1365         match Haxm::new() {
1366             Ok(_) => return Ok(HypervisorKind::Ghaxm),
1367             Err(e) => warn!("Cannot initialize HAXM: {}", e),
1368         };
1369     }
1370     #[cfg(feature = "gvm")]
1371     // Make sure Gvm device can be opened before selecting it.
1372     match Gvm::new() {
1373         Ok(_) => return Ok(HypervisorKind::Gvm),
1374         Err(e) => warn!("Cannot initialize GVM: {}", e),
1375     };
1376     bail!("no hypervisor enabled!");
1377 }
1378 
setup_vm_components(cfg: &Config) -> Result<VmComponents>1379 fn setup_vm_components(cfg: &Config) -> Result<VmComponents> {
1380     let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
1381         Some(
1382             File::open(initrd_path).with_exit_context(Exit::OpenInitrd, || {
1383                 format!("failed to open initrd {}", initrd_path.display())
1384             })?,
1385         )
1386     } else {
1387         None
1388     };
1389 
1390     let vm_image = match cfg.executable_path {
1391         Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
1392             File::open(kernel_path).with_exit_context(Exit::OpenKernel, || {
1393                 format!("failed to open kernel image {}", kernel_path.display(),)
1394             })?,
1395         ),
1396         Some(Executable::Bios(ref bios_path)) => {
1397             VmImage::Bios(File::open(bios_path).with_exit_context(Exit::OpenBios, || {
1398                 format!("failed to open bios {}", bios_path.display())
1399             })?)
1400         }
1401         _ => panic!("Did not receive a bios or kernel, should be impossible."),
1402     };
1403 
1404     let swiotlb = if let Some(size) = cfg.swiotlb {
1405         Some(
1406             size.checked_mul(1024 * 1024)
1407                 .ok_or_else(|| anyhow!("requested swiotlb size too large"))?,
1408         )
1409     } else if matches!(cfg.protection_type, ProtectionType::Unprotected) {
1410         None
1411     } else {
1412         Some(64 * 1024 * 1024)
1413     };
1414 
1415     let (pflash_image, pflash_block_size) = if let Some(pflash_parameters) = &cfg.pflash_parameters
1416     {
1417         (
1418             Some(
1419                 open_file(
1420                     &pflash_parameters.path,
1421                     OpenOptions::new().read(true).write(true),
1422                 )
1423                 .with_context(|| {
1424                     format!("failed to open pflash {}", pflash_parameters.path.display())
1425                 })?,
1426             ),
1427             pflash_parameters.block_size,
1428         )
1429     } else {
1430         (None, 0)
1431     };
1432 
1433     Ok(VmComponents {
1434         memory_size: cfg
1435             .memory
1436             .unwrap_or(256)
1437             .checked_mul(1024 * 1024)
1438             .ok_or_else(|| anyhow!("requested memory size too large"))?,
1439         swiotlb,
1440         vcpu_count: cfg.vcpu_count.unwrap_or(1),
1441         vcpu_affinity: cfg.vcpu_affinity.clone(),
1442         cpu_clusters: cfg.cpu_clusters.clone(),
1443         cpu_capacity: cfg.cpu_capacity.clone(),
1444         no_smt: cfg.no_smt,
1445         hugepages: cfg.hugepages,
1446         hv_cfg: hypervisor::Config {
1447             protection_type: cfg.protection_type,
1448         },
1449         vm_image,
1450         android_fstab: cfg
1451             .android_fstab
1452             .as_ref()
1453             .map(|x| {
1454                 File::open(x).with_exit_context(Exit::OpenAndroidFstab, || {
1455                     format!("failed to open android fstab file {}", x.display())
1456                 })
1457             })
1458             .map_or(Ok(None), |v| v.map(Some))?,
1459         pstore: cfg.pstore.clone(),
1460         pflash_block_size,
1461         pflash_image,
1462         initrd_image,
1463         extra_kernel_params: cfg.params.clone(),
1464         acpi_sdts: cfg
1465             .acpi_tables
1466             .iter()
1467             .map(|path| {
1468                 SDT::from_file(path).with_exit_context(Exit::OpenAcpiTable, || {
1469                     format!("failed to open ACPI file {}", path.display())
1470                 })
1471             })
1472             .collect::<Result<Vec<SDT>>>()?,
1473         rt_cpus: cfg.rt_cpus.clone(),
1474         delay_rt: cfg.delay_rt,
1475         dmi_path: cfg.dmi_path.clone(),
1476         no_i8042: cfg.no_i8042,
1477         no_rtc: cfg.no_rtc,
1478         host_cpu_topology: cfg.host_cpu_topology,
1479         #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1480         force_s2idle: cfg.force_s2idle,
1481         itmt: false,
1482         pvm_fw: None,
1483         #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1484         pci_low_start: cfg.pci_low_start,
1485         #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1486         pcie_ecam: cfg.pcie_ecam,
1487         #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1488         oem_strings: cfg.oem_strings.clone(),
1489     })
1490 }
1491 
1492 // Enum that allows us to assign a variable to what is essentially a &dyn IrqChipArch.
1493 enum WindowsIrqChip<V: VcpuArch> {
1494     Userspace(UserspaceIrqChip<V>),
1495     #[cfg(feature = "gvm")]
1496     Gvm(GvmIrqChip),
1497     #[cfg(feature = "whpx")]
1498     WhpxSplit(WhpxSplitIrqChip),
1499 }
1500 
1501 impl<V: VcpuArch> WindowsIrqChip<V> {
1502     // Convert our enum to a &mut dyn IrqChipArch
as_mut(&mut self) -> &mut dyn IrqChipArch1503     fn as_mut(&mut self) -> &mut dyn IrqChipArch {
1504         match self {
1505             WindowsIrqChip::Userspace(i) => i,
1506             #[cfg(feature = "gvm")]
1507             WindowsIrqChip::Gvm(i) => i,
1508             #[cfg(feature = "whpx")]
1509             WindowsIrqChip::WhpxSplit(i) => i,
1510         }
1511     }
1512 }
1513 
1514 /// Storage for the VM TSC offset for each vcpu. Stored in a static because the tracing thread will
1515 /// need access to it when tracing is enabled.
1516 static TSC_OFFSETS: once_cell::sync::Lazy<sync::Mutex<Vec<Option<u64>>>> =
1517     once_cell::sync::Lazy::new(|| sync::Mutex::new(Vec::new()));
1518 
1519 /// Save the TSC offset for a particular vcpu.
1520 ///
1521 /// After setting the TSC offset for a vcpu, this function checks the standard deviation of offsets
1522 /// for all the VCPUs and logs this information. If the TSC offsets differ too much between vcpus
1523 /// it can cause clock issues in the guest.
save_vcpu_tsc_offset(offset: u64, vcpu_id: usize)1524 pub fn save_vcpu_tsc_offset(offset: u64, vcpu_id: usize) {
1525     let offsets_copy = {
1526         let mut offsets = TSC_OFFSETS.lock();
1527         // make sure offsets vec is large enough before inserting
1528         let newlen = std::cmp::max(offsets.len(), vcpu_id + 1);
1529         offsets.resize(newlen, None);
1530         offsets[vcpu_id] = Some(offset);
1531 
1532         offsets.clone()
1533     };
1534 
1535     // do statistics on a clone of the offsets so we don't hold up other vcpus at this point
1536     info!(
1537         "TSC offset standard deviation is: {}",
1538         standard_deviation(
1539             &offsets_copy
1540                 .iter()
1541                 .filter(|x| x.is_some())
1542                 .map(|x| x.unwrap() as u128)
1543                 .collect::<Vec<u128>>()
1544         )
1545     );
1546 }
1547 
1548 /// Get the TSC offset of any vcpu. It will pick the first non-None offset it finds in TSC_OFFSETS.
1549 #[cfg(feature = "perfetto")]
get_vcpu_tsc_offset() -> u641550 pub fn get_vcpu_tsc_offset() -> u64 {
1551     if let Some(offset) = TSC_OFFSETS.lock().iter().flatten().next() {
1552         return *offset;
1553     }
1554     0
1555 }
1556 
1557 /// Callback that is registered with tracing crate, and will be called by the tracing thread when
1558 /// tracing is enabled or disabled. Regardless of whether tracing is being enabled or disabled for
1559 /// a given category or instance, we just emit a clock snapshot that maps the guest TSC to the
1560 /// host TSC. Redundant snapshots should not be a problem for perfetto.
1561 #[cfg(feature = "perfetto")]
set_tsc_clock_snapshot()1562 fn set_tsc_clock_snapshot() {
1563     let freq = match devices::tsc::tsc_frequency() {
1564         Err(e) => {
1565             error!(
1566                 "Could not determine tsc frequency, unable to snapshot tsc offset: {}",
1567                 e
1568             );
1569             return;
1570         }
1571         Ok(freq) => freq,
1572     };
1573 
1574     // The offset is host-guest tsc value
1575     let offset = get_vcpu_tsc_offset();
1576     // Safe because _rdtsc takes no arguments;
1577     let host_tsc = unsafe { std::arch::x86_64::_rdtsc() };
1578     perfetto::snapshot_clock(perfetto::ClockSnapshot::new(
1579         // Technically our multiplier should be freq/1_000_000_000, but perfetto doesn't
1580         // support floating point multipliers yet. So for now we set the freq in Hz and rely
1581         // on the merge tool to fix it.
1582         perfetto::Clock::new(
1583             perfetto::BuiltinClock::Tsc as u32,
1584             host_tsc.wrapping_add(offset),
1585         )
1586         .set_multiplier(freq as u64),
1587         perfetto::Clock::new(
1588             // The host builtin clock ids are all offset from the guest ids by
1589             // HOST_GUEST_CLOCK_ID_OFFSET when the traces are merged. Because this snapshot
1590             // contains both a guest and host clock, we need to offset it before merge.
1591             perfetto::BuiltinClock::Tsc as u32 + cros_tracing::HOST_GUEST_CLOCK_ID_OFFSET,
1592             host_tsc,
1593         )
1594         .set_multiplier(freq as u64),
1595     ));
1596 }
1597 
1598 /// Launches run_config for the broker, reading configuration from a TubeTransporter.
run_config_for_broker(raw_tube_transporter: RawDescriptor) -> Result<ExitState>1599 pub fn run_config_for_broker(raw_tube_transporter: RawDescriptor) -> Result<ExitState> {
1600     // Safe because we know that raw_transport_tube is valid (passed by inheritance), and that
1601     // the blocking & framing modes are accurate because we create them ourselves in the broker.
1602     let tube_transporter =
1603         unsafe { TubeTransporterReader::from_raw_descriptor(raw_tube_transporter) };
1604 
1605     let mut tube_data_list = tube_transporter
1606         .read_tubes()
1607         .exit_context(Exit::TubeTransporterInit, "failed to init tube transporter")?;
1608 
1609     let bootstrap_tube = tube_data_list
1610         .get_tube(TubeToken::Bootstrap)
1611         .exit_context(Exit::TubeFailure, "failed to get bootstrap tube")?;
1612 
1613     let mut cfg: Config = bootstrap_tube
1614         .recv::<Config>()
1615         .exit_context(Exit::TubeFailure, "failed to read bootstrap tube")?;
1616 
1617     let startup_args: CommonChildStartupArgs = bootstrap_tube
1618         .recv::<CommonChildStartupArgs>()
1619         .exit_context(Exit::TubeFailure, "failed to read bootstrap tube")?;
1620     let _child_cleanup = common_child_setup(startup_args).exit_context(
1621         Exit::CommonChildSetupError,
1622         "failed to perform common child setup",
1623     )?;
1624 
1625     cfg.broker_shutdown_event = Some(
1626         bootstrap_tube
1627             .recv::<Event>()
1628             .exit_context(Exit::TubeFailure, "failed to read bootstrap tube")?,
1629     );
1630     let crash_tube_map = bootstrap_tube
1631         .recv::<HashMap<ProcessType, Vec<SendTube>>>()
1632         .exit_context(Exit::TubeFailure, "failed to read bootstrap tube")?;
1633     #[cfg(feature = "crash-report")]
1634     crash_report::set_crash_tube_map(crash_tube_map);
1635 
1636     let BrokerTubes {
1637         vm_evt_wrtube,
1638         vm_evt_rdtube,
1639     } = bootstrap_tube
1640         .recv::<BrokerTubes>()
1641         .exit_context(Exit::TubeFailure, "failed to read bootstrap tube")?;
1642 
1643     run_config_inner(cfg, vm_evt_wrtube, vm_evt_rdtube)
1644 }
1645 
run_config(cfg: Config) -> Result<ExitState>1646 pub fn run_config(cfg: Config) -> Result<ExitState> {
1647     let _raise_timer_resolution = enable_high_res_timers()
1648         .exit_context(Exit::EnableHighResTimer, "failed to enable high res timer")?;
1649 
1650     // There is no broker when using run_config(), so the vm_evt tubes need to be created.
1651     let (vm_evt_wrtube, vm_evt_rdtube) =
1652         Tube::directional_pair().context("failed to create vm event tube")?;
1653 
1654     run_config_inner(cfg, vm_evt_wrtube, vm_evt_rdtube)
1655 }
1656 
create_guest_memory( components: &VmComponents, hypervisor: &impl Hypervisor, ) -> Result<GuestMemory>1657 fn create_guest_memory(
1658     components: &VmComponents,
1659     hypervisor: &impl Hypervisor,
1660 ) -> Result<GuestMemory> {
1661     let guest_mem_layout = Arch::guest_memory_layout(components, hypervisor).exit_context(
1662         Exit::GuestMemoryLayout,
1663         "failed to create guest memory layout",
1664     )?;
1665     GuestMemory::new_with_options(&guest_mem_layout)
1666         .exit_context(Exit::CreateGuestMemory, "failed to create guest memory")
1667 }
1668 
run_config_inner( cfg: Config, vm_evt_wrtube: SendTube, vm_evt_rdtube: RecvTube, ) -> Result<ExitState>1669 fn run_config_inner(
1670     cfg: Config,
1671     vm_evt_wrtube: SendTube,
1672     vm_evt_rdtube: RecvTube,
1673 ) -> Result<ExitState> {
1674     product::setup_common_metric_invariants(&cfg);
1675 
1676     #[cfg(feature = "perfetto")]
1677     cros_tracing::add_per_trace_callback(set_tsc_clock_snapshot);
1678 
1679     let components: VmComponents = setup_vm_components(&cfg)?;
1680 
1681     let default_hypervisor = get_default_hypervisor()
1682         .exit_context(Exit::NoDefaultHypervisor, "no enabled hypervisor")?;
1683     #[allow(unused_mut)]
1684     let mut hypervisor = cfg.hypervisor.unwrap_or(default_hypervisor);
1685 
1686     #[cfg(feature = "whpx")]
1687     if hypervisor::whpx::Whpx::is_enabled() {
1688         // If WHPX is enabled, no other hypervisor can be used, so just override it
1689         hypervisor = HypervisorKind::Whpx;
1690     }
1691 
1692     match hypervisor {
1693         #[cfg(feature = "haxm")]
1694         HypervisorKind::Haxm | HypervisorKind::Ghaxm => {
1695             if hypervisor == HypervisorKind::Haxm {
1696                 set_use_ghaxm(false);
1697             }
1698             info!("Creating HAXM ghaxm={}", get_use_ghaxm());
1699             let haxm = Haxm::new()?;
1700             let guest_mem = create_guest_memory(&components, &haxm)?;
1701             let vm = create_haxm_vm(haxm, guest_mem, &cfg.kernel_log_file)?;
1702             let (ioapic_host_tube, ioapic_device_tube) =
1703                 Tube::pair().exit_context(Exit::CreateTube, "failed to create tube")?;
1704             let irq_chip = create_userspace_irq_chip::<HaxmVm, HaxmVcpu>(
1705                 components.vcpu_count,
1706                 ioapic_device_tube,
1707             )?;
1708             run_vm::<HaxmVcpu, HaxmVm>(
1709                 cfg,
1710                 components,
1711                 vm,
1712                 WindowsIrqChip::Userspace(irq_chip).as_mut(),
1713                 Some(ioapic_host_tube),
1714                 vm_evt_wrtube,
1715                 vm_evt_rdtube,
1716             )
1717         }
1718         #[cfg(feature = "whpx")]
1719         HypervisorKind::Whpx => {
1720             let apic_emulation_supported =
1721                 Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation)
1722                     .exit_context(Exit::WhpxSetupError, "failed to set up whpx")?;
1723 
1724             let no_smt = cfg.no_smt;
1725 
1726             // Default to WhpxSplitIrqChip if it's supported because it's more performant
1727             let irq_chip = cfg.irq_chip.unwrap_or(if apic_emulation_supported {
1728                 IrqChipKind::Split
1729             } else {
1730                 IrqChipKind::Userspace
1731             });
1732 
1733             // Both WHPX irq chips use a userspace IOAPIC
1734             let (ioapic_host_tube, ioapic_device_tube) =
1735                 Tube::pair().exit_context(Exit::CreateTube, "failed to create tube")?;
1736 
1737             info!("Creating Whpx");
1738             let whpx = Whpx::new()?;
1739             let guest_mem = create_guest_memory(&components, &whpx)?;
1740             let vm = create_whpx_vm(
1741                 whpx,
1742                 guest_mem,
1743                 components.vcpu_count,
1744                 no_smt,
1745                 apic_emulation_supported && irq_chip == IrqChipKind::Split,
1746                 cfg.force_calibrated_tsc_leaf,
1747                 vm_evt_wrtube
1748                     .try_clone()
1749                     .expect("could not clone vm_evt_wrtube"),
1750             )?;
1751 
1752             let mut irq_chip = match irq_chip {
1753                 IrqChipKind::Kernel => unimplemented!("Kernel irqchip mode not supported by WHPX"),
1754                 IrqChipKind::Split => {
1755                     if !apic_emulation_supported {
1756                         panic!(
1757                             "split irqchip specified but your WHPX version does not support \
1758                                local apic emulation"
1759                         );
1760                     }
1761                     WindowsIrqChip::WhpxSplit(create_whpx_split_irq_chip(&vm, ioapic_device_tube)?)
1762                 }
1763                 IrqChipKind::Userspace => {
1764                     WindowsIrqChip::Userspace(create_userspace_irq_chip::<WhpxVm, WhpxVcpu>(
1765                         components.vcpu_count,
1766                         ioapic_device_tube,
1767                     )?)
1768                 }
1769             };
1770             run_vm::<WhpxVcpu, WhpxVm>(
1771                 cfg,
1772                 components,
1773                 vm,
1774                 irq_chip.as_mut(),
1775                 Some(ioapic_host_tube),
1776                 vm_evt_wrtube,
1777                 vm_evt_rdtube,
1778             )
1779         }
1780         #[cfg(feature = "gvm")]
1781         HypervisorKind::Gvm => {
1782             info!("Creating GVM");
1783             let gvm = Gvm::new()?;
1784             let guest_mem = create_guest_memory(&components, &gvm)?;
1785             let vm = create_gvm_vm(gvm, guest_mem)?;
1786             let ioapic_host_tube;
1787             let mut irq_chip = match cfg.irq_chip.unwrap_or(IrqChipKind::Kernel) {
1788                 IrqChipKind::Split => unimplemented!("Split irqchip mode not supported by GVM"),
1789                 IrqChipKind::Kernel => {
1790                     ioapic_host_tube = None;
1791                     WindowsIrqChip::Gvm(create_gvm_irq_chip(&vm, components.vcpu_count)?)
1792                 }
1793                 IrqChipKind::Userspace => {
1794                     let (host_tube, ioapic_device_tube) =
1795                         Tube::pair().exit_context(Exit::CreateTube, "failed to create tube")?;
1796                     ioapic_host_tube = Some(host_tube);
1797                     WindowsIrqChip::Userspace(create_userspace_irq_chip::<GvmVm, GvmVcpu>(
1798                         components.vcpu_count,
1799                         ioapic_device_tube,
1800                     )?)
1801                 }
1802             };
1803             run_vm::<GvmVcpu, GvmVm>(
1804                 cfg,
1805                 components,
1806                 vm,
1807                 irq_chip.as_mut(),
1808                 ioapic_host_tube,
1809                 vm_evt_wrtube,
1810                 vm_evt_rdtube,
1811             )
1812         }
1813     }
1814 }
1815 
1816 #[cfg(any(feature = "haxm", feature = "gvm", feature = "whpx"))]
run_vm<Vcpu, V>( #[allow(unused_mut)] mut cfg: Config, #[allow(unused_mut)] mut components: VmComponents, mut vm: V, irq_chip: &mut dyn IrqChipArch, ioapic_host_tube: Option<Tube>, vm_evt_wrtube: SendTube, vm_evt_rdtube: RecvTube, ) -> Result<ExitState> where Vcpu: VcpuArch + 'static, V: VmArch + 'static,1817 fn run_vm<Vcpu, V>(
1818     #[allow(unused_mut)] mut cfg: Config,
1819     #[allow(unused_mut)] mut components: VmComponents,
1820     mut vm: V,
1821     irq_chip: &mut dyn IrqChipArch,
1822     ioapic_host_tube: Option<Tube>,
1823     vm_evt_wrtube: SendTube,
1824     vm_evt_rdtube: RecvTube,
1825 ) -> Result<ExitState>
1826 where
1827     Vcpu: VcpuArch + 'static,
1828     V: VmArch + 'static,
1829 {
1830     let vm_memory_size_mb = components.memory_size / (1024 * 1024);
1831     let mut control_tubes = Vec::new();
1832     let mut irq_control_tubes = Vec::new();
1833     // Create one control tube per disk.
1834     let mut disk_device_tubes = Vec::new();
1835     let mut disk_host_tubes = Vec::new();
1836     let disk_count = cfg.disks.len();
1837     for _ in 0..disk_count {
1838         let (disk_host_tube, disk_device_tube) =
1839             Tube::pair().exit_context(Exit::CreateTube, "failed to create tube")?;
1840         disk_host_tubes.push(disk_host_tube);
1841         disk_device_tubes.push(disk_device_tube);
1842     }
1843 
1844     if let Some(ioapic_host_tube) = ioapic_host_tube {
1845         irq_control_tubes.push(ioapic_host_tube);
1846     }
1847 
1848     // Balloon gets a special socket so balloon requests can be forwarded from the main process.
1849     let (balloon_host_tube, balloon_device_tube) = if cfg.balloon {
1850         let (balloon_host_tube, balloon_device_tube) =
1851             Tube::pair().exit_context(Exit::CreateTube, "failed to create tube")?;
1852         (Some(balloon_host_tube), Some(balloon_device_tube))
1853     } else {
1854         (None, None)
1855     };
1856     // The balloon device also needs a tube to communicate back to the main process to
1857     // handle remapping memory dynamically.
1858     let dynamic_mapping_device_tube = if cfg.balloon {
1859         let (dynamic_mapping_host_tube, dynamic_mapping_device_tube) =
1860             Tube::pair().exit_context(Exit::CreateTube, "failed to create tube")?;
1861         control_tubes.push(TaggedControlTube::VmMemory(dynamic_mapping_host_tube));
1862         Some(dynamic_mapping_device_tube)
1863     } else {
1864         None
1865     };
1866 
1867     // PvClock gets a tube for handling suspend/resume requests from the main thread.
1868     let (pvclock_host_tube, pvclock_device_tube) = if cfg.pvclock {
1869         let (host, device) =
1870             Tube::pair().exit_context(Exit::CreateTube, "failed to create tube")?;
1871         (Some(host), Some(device))
1872     } else {
1873         (None, None)
1874     };
1875 
1876     let gralloc =
1877         RutabagaGralloc::new().exit_context(Exit::CreateGralloc, "failed to create gralloc")?;
1878 
1879     let pstore_size = components.pstore.as_ref().map(|pstore| pstore.size as u64);
1880     let mut sys_allocator = SystemAllocator::new(
1881         Arch::get_system_allocator_config(&vm),
1882         pstore_size,
1883         &cfg.mmio_address_ranges,
1884     )
1885     .context("failed to create system allocator")?;
1886 
1887     #[allow(unused_mut)]
1888     let mut ac97_host_tubes = Vec::new();
1889     #[allow(unused_mut)]
1890     let mut ac97_device_tubes = Vec::new();
1891     #[cfg(feature = "audio")]
1892     for _ in &cfg.ac97_parameters {
1893         let (ac97_host_tube, ac97_device_tube) =
1894             Tube::pair().exit_context(Exit::CreateTube, "failed to create tube")?;
1895         ac97_host_tubes.push(ac97_host_tube);
1896         ac97_device_tubes.push(ac97_device_tube);
1897     }
1898 
1899     // Allocate the ramoops region first.
1900     let ramoops_region = match &components.pstore {
1901         Some(pstore) => Some(
1902             arch::pstore::create_memory_region(
1903                 &mut vm,
1904                 sys_allocator.reserved_region().unwrap(),
1905                 pstore,
1906             )
1907             .exit_context(
1908                 Exit::Pstore,
1909                 format!("failed to allocate pstore region {:?}", &components.pstore),
1910             )?,
1911         ),
1912         None => None,
1913     };
1914 
1915     let init_balloon_size = components
1916         .memory_size
1917         .checked_sub(cfg.init_memory.map_or(components.memory_size, |m| {
1918             m.checked_mul(1024 * 1024).unwrap_or(u64::MAX)
1919         }))
1920         .context("failed to calculate init balloon size")?;
1921 
1922     let tsc_state = devices::tsc::tsc_state().exit_code(Exit::TscCalibrationFailed)?;
1923     let tsc_sync_mitigations = get_tsc_sync_mitigations(&tsc_state, components.vcpu_count);
1924 
1925     if tsc_state.core_grouping.size() > 1 {
1926         // Host TSCs are not in sync, log a metric about it.
1927         warn!(
1928             "Host TSCs are not in sync, applying the following mitigations: {:?}",
1929             tsc_sync_mitigations
1930         );
1931         log_descriptor(
1932             MetricEventType::TscCoresOutOfSync,
1933             // casting u64 as i64 is a no-op, so we don't lose any part of the bitmask
1934             tsc_state.core_grouping.core_grouping_bitmask() as i64,
1935         );
1936     }
1937 
1938     let product_args = product::get_run_control_args(&mut cfg);
1939 
1940     let virtio_snd_state_device_tube = create_snd_state_tube(&mut control_tubes)?;
1941 
1942     let (virtio_snd_host_mute_tube, virtio_snd_device_mute_tube) = create_snd_mute_tube_pair()?;
1943 
1944     let pci_devices = create_devices(
1945         &mut cfg,
1946         vm.get_memory(),
1947         &vm_evt_wrtube,
1948         &mut irq_control_tubes,
1949         &mut control_tubes,
1950         &mut disk_device_tubes,
1951         balloon_device_tube,
1952         pvclock_device_tube,
1953         dynamic_mapping_device_tube,
1954         /* inflate_tube= */ None,
1955         init_balloon_size,
1956         ac97_host_tubes,
1957         tsc_state.frequency,
1958         virtio_snd_state_device_tube,
1959         virtio_snd_device_mute_tube,
1960     )?;
1961 
1962     let mut vcpu_ids = Vec::new();
1963 
1964     let windows = Arch::build_vm::<V, Vcpu>(
1965         components,
1966         &vm_evt_wrtube,
1967         &mut sys_allocator,
1968         &cfg.serial_parameters,
1969         None,
1970         (cfg.battery_config.as_ref().map(|t| t.type_), None),
1971         vm,
1972         ramoops_region,
1973         pci_devices,
1974         irq_chip,
1975         &mut vcpu_ids,
1976         cfg.dump_device_tree_blob.clone(),
1977         /*debugcon_jail=*/ None,
1978         None,
1979     )
1980     .exit_context(Exit::BuildVm, "the architecture failed to build the vm")?;
1981 
1982     #[cfg(feature = "stats")]
1983     let stats = if cfg.exit_stats {
1984         Some(Arc::new(Mutex::new(StatisticsCollector::new())))
1985     } else {
1986         None
1987     };
1988 
1989     run_control(
1990         windows,
1991         sys_allocator,
1992         control_tubes,
1993         irq_control_tubes,
1994         vm_evt_rdtube,
1995         vm_evt_wrtube,
1996         cfg.broker_shutdown_event.take(),
1997         balloon_host_tube,
1998         pvclock_host_tube,
1999         gralloc,
2000         #[cfg(feature = "stats")]
2001         stats,
2002         cfg.service_pipe_name,
2003         ac97_device_tubes,
2004         vm_memory_size_mb,
2005         cfg.host_cpu_topology,
2006         tsc_sync_mitigations,
2007         cfg.force_calibrated_tsc_leaf,
2008         product_args,
2009         virtio_snd_host_mute_tube,
2010         cfg.restore_path,
2011     )
2012 }
2013 
2014 #[cfg(test)]
2015 mod tests {
2016     use tempfile::TempDir;
2017 
2018     use super::*;
2019 
create_config(test_dir: &TempDir) -> Config2020     fn create_config(test_dir: &TempDir) -> Config {
2021         let mut config = Config::default();
2022 
2023         let dummy_kernel_path = test_dir.path().join("dummy_kernel.txt");
2024         OpenOptions::new()
2025             .create(true)
2026             .write(true)
2027             .open(&dummy_kernel_path)
2028             .expect("Could not open file!");
2029         config.executable_path = Some(Executable::Kernel(dummy_kernel_path));
2030 
2031         config
2032     }
2033 
2034     #[test]
2035     #[should_panic(expected = "Did not receive a bios or kernel")]
setup_vm_components_panics_when_no_kernel_provided()2036     fn setup_vm_components_panics_when_no_kernel_provided() {
2037         let mut config =
2038             create_config(&TempDir::new().expect("Could not create temporary directory!"));
2039         config.executable_path = None;
2040         let _ = setup_vm_components(&config);
2041     }
2042 
2043     #[test]
setup_vm_components_stores_memory_in_bytes()2044     fn setup_vm_components_stores_memory_in_bytes() {
2045         let tempdir = TempDir::new().expect("Could not create temporary directory!");
2046         let mut config = create_config(&tempdir);
2047         config.memory = Some(1);
2048         let vm_components = setup_vm_components(&config).expect("failed to setup vm components");
2049         assert_eq!(vm_components.memory_size, 1024 * 1024);
2050     }
2051 
2052     #[test]
setup_vm_components_fails_when_memory_too_large()2053     fn setup_vm_components_fails_when_memory_too_large() {
2054         let tempdir = TempDir::new().expect("Could not create temporary directory!");
2055         let mut config = create_config(&tempdir);
2056         // One mb more than a u64 can hold in bytes
2057         config.memory = Some((u64::MAX / 1024 / 1024) + 1);
2058         setup_vm_components(&config).err().expect("expected error");
2059     }
2060 }
2061