• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 //! Virtual machine architecture support code.
6 
7 pub mod android;
8 pub mod pstore;
9 pub mod serial;
10 
11 pub mod sys;
12 
13 use std::collections::BTreeMap;
14 use std::error::Error as StdError;
15 use std::fs::File;
16 use std::io;
17 use std::io::Read;
18 use std::io::Seek;
19 use std::io::SeekFrom;
20 use std::ops::Deref;
21 use std::path::PathBuf;
22 use std::str::FromStr;
23 use std::sync::mpsc;
24 use std::sync::mpsc::SendError;
25 use std::sync::Arc;
26 
27 use acpi_tables::sdt::SDT;
28 use base::syslog;
29 use base::AsRawDescriptor;
30 use base::AsRawDescriptors;
31 use base::Event;
32 use base::SendTube;
33 use base::Tube;
34 use devices::virtio::VirtioDevice;
35 use devices::BarRange;
36 use devices::Bus;
37 use devices::BusDevice;
38 use devices::BusDeviceObj;
39 use devices::BusError;
40 use devices::BusResumeDevice;
41 use devices::HotPlugBus;
42 use devices::IrqChip;
43 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
44 use devices::IrqChipAArch64 as IrqChipArch;
45 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
46 use devices::IrqChipX86_64 as IrqChipArch;
47 use devices::IrqEventSource;
48 use devices::PciAddress;
49 use devices::PciBus;
50 use devices::PciDevice;
51 use devices::PciDeviceError;
52 use devices::PciInterruptPin;
53 use devices::PciRoot;
54 use devices::PciRootCommand;
55 use devices::PreferredIrq;
56 #[cfg(unix)]
57 use devices::ProxyDevice;
58 use devices::SerialHardware;
59 use devices::SerialParameters;
60 use devices::VirtioMmioDevice;
61 #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), feature = "gdb"))]
62 use gdbstub::arch::Arch;
63 #[cfg(all(target_arch = "aarch64", feature = "gdb"))]
64 use gdbstub_arch::aarch64::AArch64 as GdbArch;
65 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
66 use gdbstub_arch::x86::X86_64_SSE as GdbArch;
67 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
68 use hypervisor::CpuConfigAArch64 as CpuConfigArch;
69 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
70 use hypervisor::CpuConfigX86_64 as CpuConfigArch;
71 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
72 use hypervisor::Hypervisor as HypervisorArch;
73 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
74 use hypervisor::HypervisorX86_64 as HypervisorArch;
75 use hypervisor::IoEventAddress;
76 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
77 use hypervisor::VcpuAArch64 as VcpuArch;
78 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
79 use hypervisor::VcpuInitAArch64 as VcpuInitArch;
80 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
81 use hypervisor::VcpuInitX86_64 as VcpuInitArch;
82 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
83 use hypervisor::VcpuX86_64 as VcpuArch;
84 use hypervisor::Vm;
85 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
86 use hypervisor::VmAArch64 as VmArch;
87 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
88 use hypervisor::VmX86_64 as VmArch;
89 #[cfg(windows)]
90 use jail::FakeMinijailStub as Minijail;
91 #[cfg(unix)]
92 use minijail::Minijail;
93 use remain::sorted;
94 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
95 use resources::AddressRange;
96 use resources::SystemAllocator;
97 use resources::SystemAllocatorConfig;
98 use serde::de::Visitor;
99 use serde::Deserialize;
100 use serde::Serialize;
101 use serde_keyvalue::FromKeyValues;
102 pub use serial::add_serial_devices;
103 pub use serial::get_serial_cmdline;
104 pub use serial::set_default_serial_parameters;
105 pub use serial::GetSerialCmdlineError;
106 pub use serial::SERIAL_ADDR;
107 use sync::Mutex;
108 use thiserror::Error;
109 use vm_control::BatControl;
110 use vm_control::BatteryType;
111 use vm_control::PmResource;
112 use vm_memory::GuestAddress;
113 use vm_memory::GuestMemory;
114 use vm_memory::GuestMemoryError;
115 use vm_memory::MemoryRegionOptions;
116 
117 pub enum VmImage {
118     Kernel(File),
119     Bios(File),
120 }
121 
122 #[derive(Clone, Debug, Deserialize, Serialize, FromKeyValues, PartialEq, Eq)]
123 #[serde(deny_unknown_fields, rename_all = "kebab-case")]
124 pub struct Pstore {
125     pub path: PathBuf,
126     pub size: u32,
127 }
128 
129 /// Set of CPU cores.
130 #[derive(Clone, Debug, Default, PartialEq, Eq)]
131 pub struct CpuSet(Vec<usize>);
132 
133 impl CpuSet {
new<I: IntoIterator<Item = usize>>(cpus: I) -> Self134     pub fn new<I: IntoIterator<Item = usize>>(cpus: I) -> Self {
135         CpuSet(cpus.into_iter().collect())
136     }
137 
iter(&self) -> std::slice::Iter<'_, usize>138     pub fn iter(&self) -> std::slice::Iter<'_, usize> {
139         self.0.iter()
140     }
141 }
142 
parse_cpu_range(s: &str, cpuset: &mut Vec<usize>) -> Result<(), String>143 fn parse_cpu_range(s: &str, cpuset: &mut Vec<usize>) -> Result<(), String> {
144     fn parse_cpu(s: &str) -> Result<usize, String> {
145         s.parse().map_err(|_| {
146             format!(
147                 "invalid CPU index {} - index must be a non-negative integer",
148                 s
149             )
150         })
151     }
152 
153     let (first_cpu, last_cpu) = match s.split_once('-') {
154         Some((first_cpu, last_cpu)) => {
155             let first_cpu = parse_cpu(first_cpu)?;
156             let last_cpu = parse_cpu(last_cpu)?;
157 
158             if last_cpu < first_cpu {
159                 return Err(format!(
160                     "invalid CPU range {} - ranges must be from low to high",
161                     s
162                 ));
163             }
164             (first_cpu, last_cpu)
165         }
166         None => {
167             let cpu = parse_cpu(s)?;
168             (cpu, cpu)
169         }
170     };
171 
172     cpuset.extend(first_cpu..=last_cpu);
173 
174     Ok(())
175 }
176 
177 impl FromStr for CpuSet {
178     type Err = String;
179 
from_str(s: &str) -> Result<Self, Self::Err>180     fn from_str(s: &str) -> Result<Self, Self::Err> {
181         let mut cpuset = Vec::new();
182         for part in s.split(',') {
183             parse_cpu_range(part, &mut cpuset)?;
184         }
185         Ok(CpuSet::new(cpuset))
186     }
187 }
188 
189 impl Deref for CpuSet {
190     type Target = Vec<usize>;
191 
deref(&self) -> &Self::Target192     fn deref(&self) -> &Self::Target {
193         &self.0
194     }
195 }
196 
197 impl IntoIterator for CpuSet {
198     type Item = usize;
199     type IntoIter = std::vec::IntoIter<Self::Item>;
200 
into_iter(self) -> Self::IntoIter201     fn into_iter(self) -> Self::IntoIter {
202         self.0.into_iter()
203     }
204 }
205 
206 /// Deserializes a `CpuSet` from a sequence which elements can either be integers, or strings
207 /// representing CPU ranges (e.g. `5-8`).
208 impl<'de> Deserialize<'de> for CpuSet {
deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: serde::Deserializer<'de>,209     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
210     where
211         D: serde::Deserializer<'de>,
212     {
213         struct CpuSetVisitor;
214         impl<'de> Visitor<'de> for CpuSetVisitor {
215             type Value = CpuSet;
216 
217             fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
218                 formatter.write_str("CpuSet")
219             }
220 
221             fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
222             where
223                 A: serde::de::SeqAccess<'de>,
224             {
225                 #[derive(Deserialize)]
226                 #[serde(untagged)]
227                 enum CpuSetValue<'a> {
228                     Single(usize),
229                     Range(&'a str),
230                 }
231 
232                 let mut cpus = Vec::new();
233                 while let Some(cpuset) = seq.next_element::<CpuSetValue>()? {
234                     match cpuset {
235                         CpuSetValue::Single(cpu) => cpus.push(cpu),
236                         CpuSetValue::Range(range) => {
237                             parse_cpu_range(range, &mut cpus).map_err(serde::de::Error::custom)?;
238                         }
239                     }
240                 }
241 
242                 Ok(CpuSet::new(cpus))
243             }
244         }
245 
246         deserializer.deserialize_seq(CpuSetVisitor)
247     }
248 }
249 
250 /// Serializes a `CpuSet` into a sequence of integers and strings representing CPU ranges.
251 impl Serialize for CpuSet {
serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,252     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
253     where
254         S: serde::Serializer,
255     {
256         use serde::ser::SerializeSeq;
257 
258         let mut seq = serializer.serialize_seq(None)?;
259 
260         // Factorize ranges into "a-b" strings.
261         let mut serialize_range = |start: usize, end: usize| -> Result<(), S::Error> {
262             if start == end {
263                 seq.serialize_element(&start)?;
264             } else {
265                 seq.serialize_element(&format!("{}-{}", start, end))?;
266             }
267 
268             Ok(())
269         };
270 
271         // Current range.
272         let mut range = None;
273         for core in &self.0 {
274             range = match range {
275                 None => Some((core, core)),
276                 Some((start, end)) if *end == *core - 1 => Some((start, core)),
277                 Some((start, end)) => {
278                     serialize_range(*start, *end)?;
279                     Some((core, core))
280                 }
281             };
282         }
283 
284         if let Some((start, end)) = range {
285             serialize_range(*start, *end)?;
286         }
287 
288         seq.end()
289     }
290 }
291 
292 /// Mapping of guest VCPU threads to host CPU cores.
293 #[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)]
294 pub enum VcpuAffinity {
295     /// All VCPU threads will be pinned to the same set of host CPU cores.
296     Global(CpuSet),
297     /// Each VCPU may be pinned to a set of host CPU cores.
298     /// The map key is a guest VCPU index, and the corresponding value is the set of
299     /// host CPU indices that the VCPU thread will be allowed to run on.
300     /// If a VCPU index is not present in the map, its affinity will not be set.
301     PerVcpu(BTreeMap<usize, CpuSet>),
302 }
303 
304 /// Holds the pieces needed to build a VM. Passed to `build_vm` in the `LinuxArch` trait below to
305 /// create a `RunnableLinuxVm`.
306 #[sorted]
307 pub struct VmComponents {
308     #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), unix))]
309     pub ac_adapter: bool,
310     pub acpi_sdts: Vec<SDT>,
311     pub android_fstab: Option<File>,
312     pub cpu_capacity: BTreeMap<usize, u32>,
313     pub cpu_clusters: Vec<CpuSet>,
314     pub delay_rt: bool,
315     #[cfg(feature = "direct")]
316     pub direct_fixed_evts: Vec<devices::ACPIPMFixedEvent>,
317     #[cfg(feature = "direct")]
318     pub direct_gpe: Vec<u32>,
319     pub dmi_path: Option<PathBuf>,
320     pub extra_kernel_params: Vec<String>,
321     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
322     pub force_s2idle: bool,
323     #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), feature = "gdb"))]
324     pub gdb: Option<(u32, Tube)>, // port and control tube.
325     pub host_cpu_topology: bool,
326     pub hugepages: bool,
327     pub hv_cfg: hypervisor::Config,
328     pub initrd_image: Option<File>,
329     pub itmt: bool,
330     pub memory_size: u64,
331     pub no_i8042: bool,
332     pub no_rtc: bool,
333     pub no_smt: bool,
334     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
335     pub oem_strings: Vec<String>,
336     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
337     pub pci_low_start: Option<u64>,
338     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
339     pub pcie_ecam: Option<AddressRange>,
340     pub pflash_block_size: u32,
341     pub pflash_image: Option<File>,
342     pub pstore: Option<Pstore>,
343     /// A file to load as pVM firmware. Must be `Some` iff
344     /// `hv_cfg.protection_type == ProtectionType::UnprotectedWithFirmware`.
345     pub pvm_fw: Option<File>,
346     pub rt_cpus: CpuSet,
347     pub swiotlb: Option<u64>,
348     pub vcpu_affinity: Option<VcpuAffinity>,
349     pub vcpu_count: usize,
350     pub vm_image: VmImage,
351 }
352 
353 /// Holds the elements needed to run a Linux VM. Created by `build_vm`.
354 #[sorted]
355 pub struct RunnableLinuxVm<V: VmArch, Vcpu: VcpuArch> {
356     pub bat_control: Option<BatControl>,
357     pub delay_rt: bool,
358     pub devices_thread: Option<std::thread::JoinHandle<()>>,
359     #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), feature = "gdb"))]
360     pub gdb: Option<(u32, Tube)>,
361     pub has_bios: bool,
362     pub hotplug_bus: BTreeMap<u8, Arc<Mutex<dyn HotPlugBus>>>,
363     pub io_bus: Arc<Bus>,
364     pub irq_chip: Box<dyn IrqChipArch>,
365     pub mmio_bus: Arc<Bus>,
366     pub no_smt: bool,
367     pub pid_debug_label_map: BTreeMap<u32, String>,
368     #[cfg(unix)]
369     pub platform_devices: Vec<Arc<Mutex<dyn BusDevice>>>,
370     pub pm: Option<Arc<Mutex<dyn PmResource + Send>>>,
371     /// Devices to be notified before the system resumes from the S3 suspended state.
372     pub resume_notify_devices: Vec<Arc<Mutex<dyn BusResumeDevice>>>,
373     pub root_config: Arc<Mutex<PciRoot>>,
374     pub rt_cpus: CpuSet,
375     pub suspend_evt: Event,
376     pub vcpu_affinity: Option<VcpuAffinity>,
377     pub vcpu_count: usize,
378     pub vcpu_init: Vec<VcpuInitArch>,
379     /// If vcpus is None, then it's the responsibility of the vcpu thread to create vcpus.
380     /// If it's Some, then `build_vm` already created the vcpus.
381     pub vcpus: Option<Vec<Vcpu>>,
382     pub vm: V,
383     pub vm_request_tube: Option<Tube>,
384 }
385 
386 /// The device and optional jail.
387 pub struct VirtioDeviceStub {
388     pub dev: Box<dyn VirtioDevice>,
389     pub jail: Option<Minijail>,
390 }
391 
392 /// Trait which is implemented for each Linux Architecture in order to
393 /// set up the memory, cpus, and system devices and to boot the kernel.
394 pub trait LinuxArch {
395     type Error: StdError;
396 
397     /// Returns a Vec of the valid memory addresses as pairs of address and length. These should be
398     /// used to configure the `GuestMemory` structure for the platform.
399     ///
400     /// # Arguments
401     ///
402     /// * `components` - Parts used to determine the memory layout.
guest_memory_layout( components: &VmComponents, hypervisor: &impl hypervisor::Hypervisor, ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error>403     fn guest_memory_layout(
404         components: &VmComponents,
405         hypervisor: &impl hypervisor::Hypervisor,
406     ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error>;
407 
408     /// Gets the configuration for a new `SystemAllocator` that fits the given `Vm`'s memory layout.
409     ///
410     /// This is the per-architecture template for constructing the `SystemAllocator`. Platform
411     /// agnostic modifications may be made to this configuration, but the final `SystemAllocator`
412     /// will be at least as strict as this configuration.
413     ///
414     /// # Arguments
415     ///
416     /// * `vm` - The virtual machine to be used as a template for the `SystemAllocator`.
get_system_allocator_config<V: Vm>(vm: &V) -> SystemAllocatorConfig417     fn get_system_allocator_config<V: Vm>(vm: &V) -> SystemAllocatorConfig;
418 
419     /// Takes `VmComponents` and generates a `RunnableLinuxVm`.
420     ///
421     /// # Arguments
422     ///
423     /// * `components` - Parts to use to build the VM.
424     /// * `vm_evt_wrtube` - Tube used by sub-devices to request that crosvm exit because guest
425     ///     wants to stop/shut down or requested reset.
426     /// * `system_allocator` - Allocator created by this trait's implementation of
427     ///   `get_system_allocator_config`.
428     /// * `serial_parameters` - Definitions for how the serial devices should be configured.
429     /// * `serial_jail` - Jail used for serial devices created here.
430     /// * `battery` - Defines what battery device will be created.
431     /// * `vm` - A VM implementation to build upon.
432     /// * `ramoops_region` - Region allocated for ramoops.
433     /// * `devices` - The devices to be built into the VM.
434     /// * `irq_chip` - The IRQ chip implemention for the VM.
435     /// * `debugcon_jail` - Jail used for debugcon devices created here.
436     /// * `pflash_jail` - Jail used for pflash device created here.
build_vm<V, Vcpu>( components: VmComponents, vm_evt_wrtube: &SendTube, system_allocator: &mut SystemAllocator, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, serial_jail: Option<Minijail>, battery: (Option<BatteryType>, Option<Minijail>), vm: V, ramoops_region: Option<pstore::RamoopsRegion>, devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>, irq_chip: &mut dyn IrqChipArch, vcpu_ids: &mut Vec<usize>, dump_device_tree_blob: Option<PathBuf>, debugcon_jail: Option<Minijail>, #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pflash_jail: Option<Minijail>, #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>, ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error> where V: VmArch, Vcpu: VcpuArch437     fn build_vm<V, Vcpu>(
438         components: VmComponents,
439         vm_evt_wrtube: &SendTube,
440         system_allocator: &mut SystemAllocator,
441         serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
442         serial_jail: Option<Minijail>,
443         battery: (Option<BatteryType>, Option<Minijail>),
444         vm: V,
445         ramoops_region: Option<pstore::RamoopsRegion>,
446         devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
447         irq_chip: &mut dyn IrqChipArch,
448         vcpu_ids: &mut Vec<usize>,
449         dump_device_tree_blob: Option<PathBuf>,
450         debugcon_jail: Option<Minijail>,
451         #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pflash_jail: Option<Minijail>,
452         #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>,
453     ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
454     where
455         V: VmArch,
456         Vcpu: VcpuArch;
457 
458     /// Configures the vcpu and should be called once per vcpu from the vcpu's thread.
459     ///
460     /// # Arguments
461     ///
462     /// * `vm` - The virtual machine object.
463     /// * `hypervisor` - The `Hypervisor` that created the vcpu.
464     /// * `irq_chip` - The `IrqChip` associated with this vm.
465     /// * `vcpu` - The VCPU object to configure.
466     /// * `vcpu_init` - The data required to initialize VCPU registers and other state.
467     /// * `vcpu_id` - The id of the given `vcpu`.
468     /// * `num_cpus` - Number of virtual CPUs the guest will have.
469     /// * `has_bios` - Whether the `VmImage` is a `Bios` image
470     /// * `cpu_config` - CPU feature configurations.
configure_vcpu<V: Vm>( vm: &V, hypervisor: &dyn HypervisorArch, irq_chip: &mut dyn IrqChipArch, vcpu: &mut dyn VcpuArch, vcpu_init: VcpuInitArch, vcpu_id: usize, num_cpus: usize, has_bios: bool, cpu_config: Option<CpuConfigArch>, ) -> Result<(), Self::Error>471     fn configure_vcpu<V: Vm>(
472         vm: &V,
473         hypervisor: &dyn HypervisorArch,
474         irq_chip: &mut dyn IrqChipArch,
475         vcpu: &mut dyn VcpuArch,
476         vcpu_init: VcpuInitArch,
477         vcpu_id: usize,
478         num_cpus: usize,
479         has_bios: bool,
480         cpu_config: Option<CpuConfigArch>,
481     ) -> Result<(), Self::Error>;
482 
483     /// Configures and add a pci device into vm
register_pci_device<V: VmArch, Vcpu: VcpuArch>( linux: &mut RunnableLinuxVm<V, Vcpu>, device: Box<dyn PciDevice>, #[cfg(unix)] minijail: Option<Minijail>, resources: &mut SystemAllocator, hp_control_tube: &mpsc::Sender<PciRootCommand>, #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>, ) -> Result<PciAddress, Self::Error>484     fn register_pci_device<V: VmArch, Vcpu: VcpuArch>(
485         linux: &mut RunnableLinuxVm<V, Vcpu>,
486         device: Box<dyn PciDevice>,
487         #[cfg(unix)] minijail: Option<Minijail>,
488         resources: &mut SystemAllocator,
489         hp_control_tube: &mpsc::Sender<PciRootCommand>,
490         #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>,
491     ) -> Result<PciAddress, Self::Error>;
492 }
493 
494 #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), feature = "gdb"))]
495 pub trait GdbOps<T: VcpuArch> {
496     type Error: StdError;
497 
498     /// Reads vCPU's registers.
read_registers(vcpu: &T) -> Result<<GdbArch as Arch>::Registers, Self::Error>499     fn read_registers(vcpu: &T) -> Result<<GdbArch as Arch>::Registers, Self::Error>;
500 
501     /// Writes vCPU's registers.
write_registers(vcpu: &T, regs: &<GdbArch as Arch>::Registers) -> Result<(), Self::Error>502     fn write_registers(vcpu: &T, regs: &<GdbArch as Arch>::Registers) -> Result<(), Self::Error>;
503 
504     /// Reads bytes from the guest memory.
read_memory( vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, len: usize, ) -> Result<Vec<u8>, Self::Error>505     fn read_memory(
506         vcpu: &T,
507         guest_mem: &GuestMemory,
508         vaddr: GuestAddress,
509         len: usize,
510     ) -> Result<Vec<u8>, Self::Error>;
511 
512     /// Writes bytes to the specified guest memory.
write_memory( vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, buf: &[u8], ) -> Result<(), Self::Error>513     fn write_memory(
514         vcpu: &T,
515         guest_mem: &GuestMemory,
516         vaddr: GuestAddress,
517         buf: &[u8],
518     ) -> Result<(), Self::Error>;
519 
520     /// Reads bytes from the guest register.
read_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>, Self::Error>521     fn read_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>, Self::Error>;
522 
523     /// Writes bytes to the specified guest register.
write_register( vcpu: &T, reg_id: <GdbArch as Arch>::RegId, data: &[u8], ) -> Result<(), Self::Error>524     fn write_register(
525         vcpu: &T,
526         reg_id: <GdbArch as Arch>::RegId,
527         data: &[u8],
528     ) -> Result<(), Self::Error>;
529 
530     /// Make the next vCPU's run single-step.
enable_singlestep(vcpu: &T) -> Result<(), Self::Error>531     fn enable_singlestep(vcpu: &T) -> Result<(), Self::Error>;
532 
533     /// Get maximum number of hardware breakpoints.
get_max_hw_breakpoints(vcpu: &T) -> Result<usize, Self::Error>534     fn get_max_hw_breakpoints(vcpu: &T) -> Result<usize, Self::Error>;
535 
536     /// Set hardware breakpoints at the given addresses.
set_hw_breakpoints(vcpu: &T, breakpoints: &[GuestAddress]) -> Result<(), Self::Error>537     fn set_hw_breakpoints(vcpu: &T, breakpoints: &[GuestAddress]) -> Result<(), Self::Error>;
538 }
539 
540 /// Errors for device manager.
541 #[sorted]
542 #[derive(Error, Debug)]
543 pub enum DeviceRegistrationError {
544     /// No more MMIO space available.
545     #[error("no more addresses are available")]
546     AddrsExhausted,
547     /// Could not allocate device address space for the device.
548     #[error("Allocating device addresses: {0}")]
549     AllocateDeviceAddrs(PciDeviceError),
550     /// Could not allocate IO space for the device.
551     #[error("Allocating IO addresses: {0}")]
552     AllocateIoAddrs(PciDeviceError),
553     /// Could not allocate MMIO or IO resource for the device.
554     #[error("Allocating IO resource: {0}")]
555     AllocateIoResource(resources::Error),
556     /// Could not allocate an IRQ number.
557     #[error("Allocating IRQ number")]
558     AllocateIrq,
559     /// Could not allocate IRQ resource for the device.
560     #[cfg(unix)]
561     #[error("Allocating IRQ resource: {0}")]
562     AllocateIrqResource(devices::vfio::VfioError),
563     /// Broken pci topology
564     #[error("pci topology is broken")]
565     BrokenPciTopology,
566     /// Unable to clone a jail for the device.
567     #[cfg(unix)]
568     #[error("failed to clone jail: {0}")]
569     CloneJail(minijail::Error),
570     /// Appending to kernel command line failed.
571     #[error("unable to add device to kernel command line: {0}")]
572     Cmdline(kernel_cmdline::Error),
573     /// Configure window size failed.
574     #[error("failed to configure window size: {0}")]
575     ConfigureWindowSize(PciDeviceError),
576     // Unable to create a pipe.
577     #[error("failed to create pipe: {0}")]
578     CreatePipe(base::Error),
579     // Unable to create serial device from serial parameters
580     #[error("failed to create serial device: {0}")]
581     CreateSerialDevice(devices::SerialError),
582     // Unable to create tube
583     #[error("failed to create tube: {0}")]
584     CreateTube(base::TubeError),
585     /// Could not clone an event.
586     #[error("failed to clone event: {0}")]
587     EventClone(base::Error),
588     /// Could not create an event.
589     #[error("failed to create event: {0}")]
590     EventCreate(base::Error),
591     /// Failed to generate ACPI content.
592     #[error("failed to generate ACPI content")]
593     GenerateAcpi,
594     /// No more IRQs are available.
595     #[error("no more IRQs are available")]
596     IrqsExhausted,
597     /// Missing a required serial device.
598     #[error("missing required serial device {0}")]
599     MissingRequiredSerialDevice(u8),
600     /// Could not add a device to the mmio bus.
601     #[error("failed to add to mmio bus: {0}")]
602     MmioInsert(BusError),
603     #[cfg(unix)]
604     /// Failed to initialize proxy device for jailed device.
605     #[error("failed to create proxy device: {0}")]
606     ProxyDeviceCreation(devices::ProxyError),
607     #[cfg(unix)]
608     /// Failed to register battery device.
609     #[error("failed to register battery device to VM: {0}")]
610     RegisterBattery(devices::BatteryError),
611     /// Could not register PCI device to pci root bus
612     #[error("failed to register PCI device to pci root bus")]
613     RegisterDevice(SendError<PciRootCommand>),
614     /// Could not register PCI device capabilities.
615     #[error("could not register PCI device capabilities: {0}")]
616     RegisterDeviceCapabilities(PciDeviceError),
617     /// Failed to register ioevent with VM.
618     #[error("failed to register ioevent to VM: {0}")]
619     RegisterIoevent(base::Error),
620     /// Failed to register irq event with VM.
621     #[error("failed to register irq event to VM: {0}")]
622     RegisterIrqfd(base::Error),
623     /// Could not setup VFIO platform IRQ for the device.
624     #[error("Setting up VFIO platform IRQ: {0}")]
625     SetupVfioPlatformIrq(anyhow::Error),
626 }
627 
628 /// Config a PCI device for used by this vm.
configure_pci_device<V: VmArch, Vcpu: VcpuArch>( linux: &mut RunnableLinuxVm<V, Vcpu>, mut device: Box<dyn PciDevice>, #[cfg(unix)] jail: Option<Minijail>, resources: &mut SystemAllocator, hp_control_tube: &mpsc::Sender<PciRootCommand>, #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>, ) -> Result<PciAddress, DeviceRegistrationError>629 pub fn configure_pci_device<V: VmArch, Vcpu: VcpuArch>(
630     linux: &mut RunnableLinuxVm<V, Vcpu>,
631     mut device: Box<dyn PciDevice>,
632     #[cfg(unix)] jail: Option<Minijail>,
633     resources: &mut SystemAllocator,
634     hp_control_tube: &mpsc::Sender<PciRootCommand>,
635     #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>,
636 ) -> Result<PciAddress, DeviceRegistrationError> {
637     // Allocate PCI device address before allocating BARs.
638     let pci_address = device
639         .allocate_address(resources)
640         .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
641 
642     // Allocate ranges that may need to be in the low MMIO region (MmioType::Low).
643     let mmio_ranges = device
644         .allocate_io_bars(resources)
645         .map_err(DeviceRegistrationError::AllocateIoAddrs)?;
646 
647     // Allocate device ranges that may be in low or high MMIO after low-only ranges.
648     let device_ranges = device
649         .allocate_device_bars(resources)
650         .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
651 
652     // If device is a pcie bridge, add its pci bus to pci root
653     if let Some(pci_bus) = device.get_new_pci_bus() {
654         hp_control_tube
655             .send(PciRootCommand::AddBridge(pci_bus))
656             .map_err(DeviceRegistrationError::RegisterDevice)?;
657         let bar_ranges = Vec::new();
658         device
659             .configure_bridge_window(resources, &bar_ranges)
660             .map_err(DeviceRegistrationError::ConfigureWindowSize)?;
661     }
662 
663     // Do not suggest INTx for hot-plug devices.
664     let intx_event = devices::IrqLevelEvent::new().map_err(DeviceRegistrationError::EventCreate)?;
665 
666     if let PreferredIrq::Fixed { pin, gsi } = device.preferred_irq() {
667         resources.reserve_irq(gsi);
668 
669         device.assign_irq(
670             intx_event
671                 .try_clone()
672                 .map_err(DeviceRegistrationError::EventClone)?,
673             pin,
674             gsi,
675         );
676 
677         linux
678             .irq_chip
679             .as_irq_chip_mut()
680             .register_level_irq_event(gsi, &intx_event, IrqEventSource::from_device(&device))
681             .map_err(DeviceRegistrationError::RegisterIrqfd)?;
682     }
683 
684     let mut keep_rds = device.keep_rds();
685     syslog::push_descriptors(&mut keep_rds);
686     cros_tracing::push_descriptors!(&mut keep_rds);
687 
688     device
689         .register_device_capabilities()
690         .map_err(DeviceRegistrationError::RegisterDeviceCapabilities)?;
691     for (event, addr, datamatch) in device.ioevents() {
692         let io_addr = IoEventAddress::Mmio(addr);
693         linux
694             .vm
695             .register_ioevent(event, io_addr, datamatch)
696             .map_err(DeviceRegistrationError::RegisterIoevent)?;
697         keep_rds.push(event.as_raw_descriptor());
698     }
699 
700     #[cfg(unix)]
701     let arced_dev: Arc<Mutex<dyn BusDevice>> = if let Some(jail) = jail {
702         let proxy = ProxyDevice::new(
703             device,
704             jail,
705             keep_rds,
706             #[cfg(feature = "swap")]
707             swap_controller,
708         )
709         .map_err(DeviceRegistrationError::ProxyDeviceCreation)?;
710         linux
711             .pid_debug_label_map
712             .insert(proxy.pid() as u32, proxy.debug_label());
713         Arc::new(Mutex::new(proxy))
714     } else {
715         device.on_sandboxed();
716         Arc::new(Mutex::new(device))
717     };
718 
719     #[cfg(windows)]
720     let arced_dev = {
721         device.on_sandboxed();
722         Arc::new(Mutex::new(device))
723     };
724 
725     #[cfg(unix)]
726     hp_control_tube
727         .send(PciRootCommand::Add(pci_address, arced_dev.clone()))
728         .map_err(DeviceRegistrationError::RegisterDevice)?;
729 
730     for range in &mmio_ranges {
731         linux
732             .mmio_bus
733             .insert(arced_dev.clone(), range.addr, range.size)
734             .map_err(DeviceRegistrationError::MmioInsert)?;
735     }
736 
737     for range in &device_ranges {
738         linux
739             .mmio_bus
740             .insert(arced_dev.clone(), range.addr, range.size)
741             .map_err(DeviceRegistrationError::MmioInsert)?;
742     }
743 
744     Ok(pci_address)
745 }
746 
747 /// Creates Virtio MMIO devices for use by this Vm.
generate_virtio_mmio_bus( devices: Vec<(VirtioMmioDevice, Option<Minijail>)>, irq_chip: &mut dyn IrqChip, mmio_bus: &Bus, resources: &mut SystemAllocator, vm: &mut impl Vm, sdts: Vec<SDT>, #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>, ) -> Result<(BTreeMap<u32, String>, Vec<SDT>), DeviceRegistrationError>748 pub fn generate_virtio_mmio_bus(
749     devices: Vec<(VirtioMmioDevice, Option<Minijail>)>,
750     irq_chip: &mut dyn IrqChip,
751     mmio_bus: &Bus,
752     resources: &mut SystemAllocator,
753     vm: &mut impl Vm,
754     sdts: Vec<SDT>,
755     #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>,
756 ) -> Result<(BTreeMap<u32, String>, Vec<SDT>), DeviceRegistrationError> {
757     #[cfg_attr(windows, allow(unused_mut))]
758     let mut pid_labels = BTreeMap::new();
759 
760     // sdts can be updated only on x86 platforms.
761     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
762     let mut sdts = sdts;
763     for dev_value in devices.into_iter() {
764         #[cfg(unix)]
765         let (mut device, jail) = dev_value;
766         #[cfg(windows)]
767         let (mut device, _) = dev_value;
768 
769         let ranges = device
770             .allocate_regions(resources)
771             .map_err(DeviceRegistrationError::AllocateIoResource)?;
772 
773         let mut keep_rds = device.keep_rds();
774         syslog::push_descriptors(&mut keep_rds);
775         cros_tracing::push_descriptors!(&mut keep_rds);
776 
777         let irq_num = resources
778             .allocate_irq()
779             .ok_or(DeviceRegistrationError::AllocateIrq)?;
780         let irq_evt = devices::IrqEdgeEvent::new().map_err(DeviceRegistrationError::EventCreate)?;
781         irq_chip
782             .register_edge_irq_event(irq_num, &irq_evt, IrqEventSource::from_device(&device))
783             .map_err(DeviceRegistrationError::RegisterIrqfd)?;
784         device.assign_irq(&irq_evt, irq_num);
785         keep_rds.extend(irq_evt.as_raw_descriptors());
786 
787         for (event, addr, datamatch) in device.ioevents() {
788             let io_addr = IoEventAddress::Mmio(addr);
789             vm.register_ioevent(event, io_addr, datamatch)
790                 .map_err(DeviceRegistrationError::RegisterIoevent)?;
791             keep_rds.push(event.as_raw_descriptor());
792         }
793 
794         #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
795         {
796             sdts = device
797                 .generate_acpi(sdts)
798                 .ok_or(DeviceRegistrationError::GenerateAcpi)?;
799         }
800 
801         #[cfg(unix)]
802         let arced_dev: Arc<Mutex<dyn BusDevice>> = if let Some(jail) = jail {
803             let proxy = ProxyDevice::new(
804                 device,
805                 jail,
806                 keep_rds,
807                 #[cfg(feature = "swap")]
808                 swap_controller,
809             )
810             .map_err(DeviceRegistrationError::ProxyDeviceCreation)?;
811             pid_labels.insert(proxy.pid() as u32, proxy.debug_label());
812             Arc::new(Mutex::new(proxy))
813         } else {
814             device.on_sandboxed();
815             Arc::new(Mutex::new(device))
816         };
817 
818         #[cfg(windows)]
819         let arced_dev = {
820             device.on_sandboxed();
821             Arc::new(Mutex::new(device))
822         };
823 
824         for range in &ranges {
825             mmio_bus
826                 .insert(arced_dev.clone(), range.0, range.1)
827                 .map_err(DeviceRegistrationError::MmioInsert)?;
828         }
829     }
830     Ok((pid_labels, sdts))
831 }
832 
833 // Generate pci topology starting from parent bus
generate_pci_topology( parent_bus: Arc<Mutex<PciBus>>, resources: &mut SystemAllocator, io_ranges: &mut BTreeMap<usize, Vec<BarRange>>, device_ranges: &mut BTreeMap<usize, Vec<BarRange>>, device_addrs: &[PciAddress], devices: &mut Vec<(Box<dyn PciDevice>, Option<Minijail>)>, ) -> Result<(Vec<BarRange>, u8), DeviceRegistrationError>834 pub fn generate_pci_topology(
835     parent_bus: Arc<Mutex<PciBus>>,
836     resources: &mut SystemAllocator,
837     io_ranges: &mut BTreeMap<usize, Vec<BarRange>>,
838     device_ranges: &mut BTreeMap<usize, Vec<BarRange>>,
839     device_addrs: &[PciAddress],
840     devices: &mut Vec<(Box<dyn PciDevice>, Option<Minijail>)>,
841 ) -> Result<(Vec<BarRange>, u8), DeviceRegistrationError> {
842     let mut bar_ranges = Vec::new();
843     let bus_num = parent_bus.lock().get_bus_num();
844     let mut subordinate_bus = bus_num;
845     for (dev_idx, addr) in device_addrs.iter().enumerate() {
846         // Only target for devices that located on this bus
847         if addr.bus == bus_num {
848             // If this device is a pci bridge (a.k.a., it has a pci bus structure),
849             // create its topology recursively
850             if let Some(child_bus) = devices[dev_idx].0.get_new_pci_bus() {
851                 let (child_bar_ranges, child_sub_bus) = generate_pci_topology(
852                     child_bus.clone(),
853                     resources,
854                     io_ranges,
855                     device_ranges,
856                     device_addrs,
857                     devices,
858                 )?;
859                 let device = &mut devices[dev_idx].0;
860                 parent_bus
861                     .lock()
862                     .add_child_bus(child_bus.clone())
863                     .map_err(|_| DeviceRegistrationError::BrokenPciTopology)?;
864                 let bridge_window = device
865                     .configure_bridge_window(resources, &child_bar_ranges)
866                     .map_err(DeviceRegistrationError::ConfigureWindowSize)?;
867                 bar_ranges.extend(bridge_window);
868 
869                 let ranges = device
870                     .allocate_io_bars(resources)
871                     .map_err(DeviceRegistrationError::AllocateIoAddrs)?;
872                 io_ranges.insert(dev_idx, ranges.clone());
873                 bar_ranges.extend(ranges);
874 
875                 let ranges = device
876                     .allocate_device_bars(resources)
877                     .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
878                 device_ranges.insert(dev_idx, ranges.clone());
879                 bar_ranges.extend(ranges);
880 
881                 device.set_subordinate_bus(child_sub_bus);
882 
883                 subordinate_bus = std::cmp::max(subordinate_bus, child_sub_bus);
884             }
885         }
886     }
887 
888     for (dev_idx, addr) in device_addrs.iter().enumerate() {
889         if addr.bus == bus_num {
890             let device = &mut devices[dev_idx].0;
891             // Allocate MMIO for non-bridge devices
892             if device.get_new_pci_bus().is_none() {
893                 let ranges = device
894                     .allocate_io_bars(resources)
895                     .map_err(DeviceRegistrationError::AllocateIoAddrs)?;
896                 io_ranges.insert(dev_idx, ranges.clone());
897                 bar_ranges.extend(ranges);
898 
899                 let ranges = device
900                     .allocate_device_bars(resources)
901                     .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
902                 device_ranges.insert(dev_idx, ranges.clone());
903                 bar_ranges.extend(ranges);
904             }
905         }
906     }
907     Ok((bar_ranges, subordinate_bus))
908 }
909 
910 /// Ensure all PCI devices have an assigned PCI address.
assign_pci_addresses( devices: &mut [(Box<dyn BusDeviceObj>, Option<Minijail>)], resources: &mut SystemAllocator, ) -> Result<(), DeviceRegistrationError>911 pub fn assign_pci_addresses(
912     devices: &mut [(Box<dyn BusDeviceObj>, Option<Minijail>)],
913     resources: &mut SystemAllocator,
914 ) -> Result<(), DeviceRegistrationError> {
915     // First allocate devices with a preferred address.
916     for pci_device in devices
917         .iter_mut()
918         .filter_map(|(device, _jail)| device.as_pci_device_mut())
919         .filter(|pci_device| pci_device.preferred_address().is_some())
920     {
921         let _ = pci_device
922             .allocate_address(resources)
923             .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
924     }
925 
926     // Then allocate addresses for the remaining devices.
927     for pci_device in devices
928         .iter_mut()
929         .filter_map(|(device, _jail)| device.as_pci_device_mut())
930         .filter(|pci_device| pci_device.preferred_address().is_none())
931     {
932         let _ = pci_device
933             .allocate_address(resources)
934             .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
935     }
936 
937     Ok(())
938 }
939 
940 /// Creates a root PCI device for use by this Vm.
generate_pci_root( mut devices: Vec<(Box<dyn PciDevice>, Option<Minijail>)>, irq_chip: &mut dyn IrqChip, mmio_bus: Arc<Bus>, io_bus: Arc<Bus>, resources: &mut SystemAllocator, vm: &mut impl Vm, max_irqs: usize, vcfg_base: Option<u64>, #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>, ) -> Result< ( PciRoot, Vec<(PciAddress, u32, PciInterruptPin)>, BTreeMap<u32, String>, BTreeMap<PciAddress, Vec<u8>>, ), DeviceRegistrationError, >941 pub fn generate_pci_root(
942     mut devices: Vec<(Box<dyn PciDevice>, Option<Minijail>)>,
943     irq_chip: &mut dyn IrqChip,
944     mmio_bus: Arc<Bus>,
945     io_bus: Arc<Bus>,
946     resources: &mut SystemAllocator,
947     vm: &mut impl Vm,
948     max_irqs: usize,
949     vcfg_base: Option<u64>,
950     #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>,
951 ) -> Result<
952     (
953         PciRoot,
954         Vec<(PciAddress, u32, PciInterruptPin)>,
955         BTreeMap<u32, String>,
956         BTreeMap<PciAddress, Vec<u8>>,
957     ),
958     DeviceRegistrationError,
959 > {
960     let mut device_addrs = Vec::new();
961 
962     for (device, _jail) in devices.iter_mut() {
963         let address = device
964             .allocate_address(resources)
965             .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
966         device_addrs.push(address);
967     }
968 
969     let mut device_ranges = BTreeMap::new();
970     let mut io_ranges = BTreeMap::new();
971     let root_bus = Arc::new(Mutex::new(PciBus::new(0, 0, false)));
972 
973     generate_pci_topology(
974         root_bus.clone(),
975         resources,
976         &mut io_ranges,
977         &mut device_ranges,
978         &device_addrs,
979         &mut devices,
980     )?;
981 
982     let mut root = PciRoot::new(Arc::downgrade(&mmio_bus), Arc::downgrade(&io_bus), root_bus);
983     #[cfg_attr(windows, allow(unused_mut))]
984     let mut pid_labels = BTreeMap::new();
985 
986     // Allocate legacy INTx
987     let mut pci_irqs = Vec::new();
988     let mut irqs: Vec<u32> = Vec::new();
989 
990     // Mapping of (bus, dev, pin) -> IRQ number.
991     let mut dev_pin_irq = BTreeMap::new();
992 
993     for (dev_idx, (device, _jail)) in devices.iter_mut().enumerate() {
994         let pci_address = device_addrs[dev_idx];
995 
996         let irq = match device.preferred_irq() {
997             PreferredIrq::Fixed { pin, gsi } => {
998                 // The device reported a preferred IRQ, so use that rather than allocating one.
999                 resources.reserve_irq(gsi);
1000                 Some((pin, gsi))
1001             }
1002             PreferredIrq::Any => {
1003                 // The device did not provide a preferred IRQ but requested one, so allocate one.
1004 
1005                 // Choose a pin based on the slot's function number. Function 0 must always use
1006                 // INTA# for single-function devices per the PCI spec, and we choose to use INTA#
1007                 // for function 0 on multifunction devices and distribute the remaining functions
1008                 // evenly across the other pins.
1009                 let pin = match pci_address.func % 4 {
1010                     0 => PciInterruptPin::IntA,
1011                     1 => PciInterruptPin::IntB,
1012                     2 => PciInterruptPin::IntC,
1013                     _ => PciInterruptPin::IntD,
1014                 };
1015 
1016                 // If an IRQ number has already been assigned for a different function with this
1017                 // (bus, device, pin) combination, use it. Otherwise allocate a new one and insert
1018                 // it into the map.
1019                 let pin_key = (pci_address.bus, pci_address.dev, pin);
1020                 let irq_num = if let Some(irq_num) = dev_pin_irq.get(&pin_key) {
1021                     *irq_num
1022                 } else {
1023                     // If we have allocated fewer than `max_irqs` total, add a new irq to the `irqs`
1024                     // pool. Otherwise, share one of the existing `irqs`.
1025                     let irq_num = if irqs.len() < max_irqs {
1026                         let irq_num = resources
1027                             .allocate_irq()
1028                             .ok_or(DeviceRegistrationError::AllocateIrq)?;
1029                         irqs.push(irq_num);
1030                         irq_num
1031                     } else {
1032                         // Pick one of the existing IRQs to share, using `dev_idx` to distribute IRQ
1033                         // sharing evenly across devices.
1034                         irqs[dev_idx % max_irqs]
1035                     };
1036 
1037                     dev_pin_irq.insert(pin_key, irq_num);
1038                     irq_num
1039                 };
1040                 Some((pin, irq_num))
1041             }
1042             PreferredIrq::None => {
1043                 // The device does not want an INTx# IRQ.
1044                 None
1045             }
1046         };
1047 
1048         if let Some((pin, gsi)) = irq {
1049             let intx_event =
1050                 devices::IrqLevelEvent::new().map_err(DeviceRegistrationError::EventCreate)?;
1051 
1052             device.assign_irq(
1053                 intx_event
1054                     .try_clone()
1055                     .map_err(DeviceRegistrationError::EventClone)?,
1056                 pin,
1057                 gsi,
1058             );
1059 
1060             irq_chip
1061                 .register_level_irq_event(gsi, &intx_event, IrqEventSource::from_device(device))
1062                 .map_err(DeviceRegistrationError::RegisterIrqfd)?;
1063 
1064             pci_irqs.push((pci_address, gsi, pin));
1065         }
1066     }
1067 
1068     // To prevent issues where device's on_sandbox may spawn thread before all
1069     // sandboxed devices are sandboxed we partition iterator to go over sandboxed
1070     // first. This is needed on linux platforms. On windows, this is a no-op since
1071     // jails are always None, even for sandboxed devices.
1072     let devices = {
1073         let (sandboxed, non_sandboxed): (Vec<_>, Vec<_>) = devices
1074             .into_iter()
1075             .enumerate()
1076             .partition(|(_, (_, jail))| jail.is_some());
1077         sandboxed.into_iter().chain(non_sandboxed.into_iter())
1078     };
1079 
1080     let mut amls = BTreeMap::new();
1081     for (dev_idx, dev_value) in devices {
1082         #[cfg(unix)]
1083         let (mut device, jail) = dev_value;
1084         #[cfg(windows)]
1085         let (mut device, _) = dev_value;
1086         let address = device_addrs[dev_idx];
1087 
1088         let mut keep_rds = device.keep_rds();
1089         syslog::push_descriptors(&mut keep_rds);
1090         cros_tracing::push_descriptors!(&mut keep_rds);
1091         keep_rds.append(&mut vm.get_memory().as_raw_descriptors());
1092 
1093         let ranges = io_ranges.remove(&dev_idx).unwrap_or_default();
1094         let device_ranges = device_ranges.remove(&dev_idx).unwrap_or_default();
1095         device
1096             .register_device_capabilities()
1097             .map_err(DeviceRegistrationError::RegisterDeviceCapabilities)?;
1098         for (event, addr, datamatch) in device.ioevents() {
1099             let io_addr = IoEventAddress::Mmio(addr);
1100             vm.register_ioevent(event, io_addr, datamatch)
1101                 .map_err(DeviceRegistrationError::RegisterIoevent)?;
1102             keep_rds.push(event.as_raw_descriptor());
1103         }
1104 
1105         if let Some(vcfg_base) = vcfg_base {
1106             let (methods, shm) = device.generate_acpi_methods();
1107             if !methods.is_empty() {
1108                 amls.insert(address, methods);
1109             }
1110             if let Some((offset, mmap)) = shm {
1111                 let _ = vm.add_memory_region(
1112                     GuestAddress(vcfg_base + offset as u64),
1113                     Box::new(mmap),
1114                     false,
1115                     false,
1116                 );
1117             }
1118         }
1119 
1120         #[cfg(unix)]
1121         let arced_dev: Arc<Mutex<dyn BusDevice>> = if let Some(jail) = jail {
1122             let proxy = ProxyDevice::new(
1123                 device,
1124                 jail,
1125                 keep_rds,
1126                 #[cfg(feature = "swap")]
1127                 swap_controller,
1128             )
1129             .map_err(DeviceRegistrationError::ProxyDeviceCreation)?;
1130             pid_labels.insert(proxy.pid() as u32, proxy.debug_label());
1131             Arc::new(Mutex::new(proxy))
1132         } else {
1133             device.on_sandboxed();
1134             Arc::new(Mutex::new(device))
1135         };
1136         #[cfg(windows)]
1137         let arced_dev = {
1138             device.on_sandboxed();
1139             Arc::new(Mutex::new(device))
1140         };
1141         root.add_device(address, arced_dev.clone());
1142         for range in &ranges {
1143             mmio_bus
1144                 .insert(arced_dev.clone(), range.addr, range.size)
1145                 .map_err(DeviceRegistrationError::MmioInsert)?;
1146         }
1147 
1148         for range in &device_ranges {
1149             mmio_bus
1150                 .insert(arced_dev.clone(), range.addr, range.size)
1151                 .map_err(DeviceRegistrationError::MmioInsert)?;
1152         }
1153     }
1154 
1155     Ok((root, pci_irqs, pid_labels, amls))
1156 }
1157 
1158 /// Errors for image loading.
1159 #[sorted]
1160 #[derive(Error, Debug)]
1161 pub enum LoadImageError {
1162     #[error("Alignment not a power of two: {0}")]
1163     BadAlignment(u64),
1164     #[error("Image size too large: {0}")]
1165     ImageSizeTooLarge(u64),
1166     #[error("Reading image into memory failed: {0}")]
1167     ReadToMemory(GuestMemoryError),
1168     #[error("Seek failed: {0}")]
1169     Seek(io::Error),
1170 }
1171 
1172 /// Load an image from a file into guest memory.
1173 ///
1174 /// # Arguments
1175 ///
1176 /// * `guest_mem` - The memory to be used by the guest.
1177 /// * `guest_addr` - The starting address to load the image in the guest memory.
1178 /// * `max_size` - The amount of space in bytes available in the guest memory for the image.
1179 /// * `image` - The file containing the image to be loaded.
1180 ///
1181 /// The size in bytes of the loaded image is returned.
load_image<F>( guest_mem: &GuestMemory, image: &mut F, guest_addr: GuestAddress, max_size: u64, ) -> Result<usize, LoadImageError> where F: Read + Seek + AsRawDescriptor,1182 pub fn load_image<F>(
1183     guest_mem: &GuestMemory,
1184     image: &mut F,
1185     guest_addr: GuestAddress,
1186     max_size: u64,
1187 ) -> Result<usize, LoadImageError>
1188 where
1189     F: Read + Seek + AsRawDescriptor,
1190 {
1191     let size = image.seek(SeekFrom::End(0)).map_err(LoadImageError::Seek)?;
1192 
1193     if size > usize::max_value() as u64 || size > max_size {
1194         return Err(LoadImageError::ImageSizeTooLarge(size));
1195     }
1196 
1197     // This is safe due to the bounds check above.
1198     let size = size as usize;
1199 
1200     image
1201         .seek(SeekFrom::Start(0))
1202         .map_err(LoadImageError::Seek)?;
1203 
1204     guest_mem
1205         .read_to_memory(guest_addr, image, size)
1206         .map_err(LoadImageError::ReadToMemory)?;
1207 
1208     Ok(size)
1209 }
1210 
1211 /// Load an image from a file into guest memory at the highest possible address.
1212 ///
1213 /// # Arguments
1214 ///
1215 /// * `guest_mem` - The memory to be used by the guest.
1216 /// * `image` - The file containing the image to be loaded.
1217 /// * `min_guest_addr` - The minimum address of the start of the image.
1218 /// * `max_guest_addr` - The address to load the last byte of the image.
1219 /// * `align` - The minimum alignment of the start address of the image in bytes
1220 ///   (must be a power of two).
1221 ///
1222 /// The guest address and size in bytes of the loaded image are returned.
load_image_high<F>( guest_mem: &GuestMemory, image: &mut F, min_guest_addr: GuestAddress, max_guest_addr: GuestAddress, align: u64, ) -> Result<(GuestAddress, usize), LoadImageError> where F: Read + Seek + AsRawDescriptor,1223 pub fn load_image_high<F>(
1224     guest_mem: &GuestMemory,
1225     image: &mut F,
1226     min_guest_addr: GuestAddress,
1227     max_guest_addr: GuestAddress,
1228     align: u64,
1229 ) -> Result<(GuestAddress, usize), LoadImageError>
1230 where
1231     F: Read + Seek + AsRawDescriptor,
1232 {
1233     if !align.is_power_of_two() {
1234         return Err(LoadImageError::BadAlignment(align));
1235     }
1236 
1237     let max_size = max_guest_addr.offset_from(min_guest_addr) & !(align - 1);
1238     let size = image.seek(SeekFrom::End(0)).map_err(LoadImageError::Seek)?;
1239 
1240     if size > usize::max_value() as u64 || size > max_size {
1241         return Err(LoadImageError::ImageSizeTooLarge(size));
1242     }
1243 
1244     image
1245         .seek(SeekFrom::Start(0))
1246         .map_err(LoadImageError::Seek)?;
1247 
1248     // Load image at the maximum aligned address allowed.
1249     // The subtraction cannot underflow because of the size checks above.
1250     let guest_addr = GuestAddress((max_guest_addr.offset() - size) & !(align - 1));
1251 
1252     // This is safe due to the bounds check above.
1253     let size = size as usize;
1254 
1255     guest_mem
1256         .read_to_memory(guest_addr, image, size)
1257         .map_err(LoadImageError::ReadToMemory)?;
1258 
1259     Ok((guest_addr, size))
1260 }
1261 
1262 /// Read and write permissions setting
1263 ///
1264 /// Wrap read_allow and write_allow to store them in MsrHandlers level.
1265 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Eq, Serialize)]
1266 pub enum MsrRWType {
1267     #[serde(rename = "r")]
1268     ReadOnly,
1269     #[serde(rename = "w")]
1270     WriteOnly,
1271     #[serde(rename = "rw", alias = "wr")]
1272     ReadWrite,
1273 }
1274 
1275 /// Handler types for userspace-msr
1276 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Eq, Serialize)]
1277 pub enum MsrAction {
1278     /// Read and write from host directly, and the control of MSR will
1279     /// take effect on host.
1280     #[serde(rename = "pass")]
1281     MsrPassthrough,
1282     /// Store the dummy value for msr (copy from host or custom values),
1283     /// and the control(WRMSR) of MSR won't take effect on host.
1284     #[serde(rename = "emu")]
1285     MsrEmulate,
1286 }
1287 
1288 /// Source CPU of MSR value
1289 ///
1290 /// Indicate which CPU that user get/set MSRs from/to.
1291 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Eq, Serialize)]
1292 pub enum MsrValueFrom {
1293     /// Read/write MSR value from/into CPU 0.
1294     /// The MSR source CPU always be CPU 0.
1295     #[serde(rename = "cpu0")]
1296     RWFromCPU0,
1297     /// Read/write MSR value from/into the running CPU.
1298     /// If vCPU migrates to another pcpu, the MSR source CPU will also change.
1299     #[serde(skip)]
1300     RWFromRunningCPU,
1301 }
1302 
1303 /// Whether to force KVM-filtered MSRs.
1304 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Eq, Serialize)]
1305 pub enum MsrFilter {
1306     /// Leave it to hypervisor (KVM) default.
1307     #[serde(rename = "no")]
1308     Default,
1309     /// Don't let KVM do the default thing and use our userspace MSR
1310     /// implementation.
1311     #[serde(rename = "yes")]
1312     Override,
1313 }
1314 
1315 /// Config option for userspace-msr handing
1316 ///
1317 /// MsrConfig will be collected with its corresponding MSR's index.
1318 /// eg, (msr_index, msr_config)
1319 #[derive(Clone, Serialize, Deserialize)]
1320 pub struct MsrConfig {
1321     /// If support RDMSR/WRMSR emulation in crosvm?
1322     pub rw_type: MsrRWType,
1323     /// Handlers should be used to handling MSR.
1324     pub action: MsrAction,
1325     /// MSR source CPU.
1326     pub from: MsrValueFrom,
1327     /// Whether to override KVM MSR emulation.
1328     pub filter: MsrFilter,
1329 }
1330 
1331 #[sorted]
1332 #[derive(Error, Debug)]
1333 pub enum MsrExitHandlerError {
1334     #[error("Fail to create MSR handler")]
1335     HandlerCreateFailed,
1336 }
1337 
1338 #[cfg(test)]
1339 mod tests {
1340     use serde_keyvalue::from_key_values;
1341 
1342     use super::*;
1343 
1344     #[test]
parse_pstore()1345     fn parse_pstore() {
1346         let res: Pstore = from_key_values("path=/some/path,size=16384").unwrap();
1347         assert_eq!(
1348             res,
1349             Pstore {
1350                 path: "/some/path".into(),
1351                 size: 16384,
1352             }
1353         );
1354 
1355         let res = from_key_values::<Pstore>("path=/some/path");
1356         assert!(res.is_err());
1357 
1358         let res = from_key_values::<Pstore>("size=16384");
1359         assert!(res.is_err());
1360 
1361         let res = from_key_values::<Pstore>("");
1362         assert!(res.is_err());
1363     }
1364 
1365     #[test]
deserialize_cpuset_serde_kv()1366     fn deserialize_cpuset_serde_kv() {
1367         let res: CpuSet = from_key_values("[0,4,7]").unwrap();
1368         assert_eq!(res, CpuSet::new(vec![0, 4, 7]));
1369 
1370         let res: CpuSet = from_key_values("[9-12]").unwrap();
1371         assert_eq!(res, CpuSet::new(vec![9, 10, 11, 12]));
1372 
1373         let res: CpuSet = from_key_values("[0,4,7,9-12,15]").unwrap();
1374         assert_eq!(res, CpuSet::new(vec![0, 4, 7, 9, 10, 11, 12, 15]));
1375     }
1376 
1377     #[test]
deserialize_serialize_cpuset_json()1378     fn deserialize_serialize_cpuset_json() {
1379         let json_str = "[0,4,7]";
1380         let cpuset = CpuSet::new(vec![0, 4, 7]);
1381         let res: CpuSet = serde_json::from_str(json_str).unwrap();
1382         assert_eq!(res, cpuset);
1383         assert_eq!(serde_json::to_string(&cpuset).unwrap(), json_str);
1384 
1385         let json_str = r#"["9-12"]"#;
1386         let cpuset = CpuSet::new(vec![9, 10, 11, 12]);
1387         let res: CpuSet = serde_json::from_str(json_str).unwrap();
1388         assert_eq!(res, cpuset);
1389         assert_eq!(serde_json::to_string(&cpuset).unwrap(), json_str);
1390 
1391         let json_str = r#"[0,4,7,"9-12",15]"#;
1392         let cpuset = CpuSet::new(vec![0, 4, 7, 9, 10, 11, 12, 15]);
1393         let res: CpuSet = serde_json::from_str(json_str).unwrap();
1394         assert_eq!(res, cpuset);
1395         assert_eq!(serde_json::to_string(&cpuset).unwrap(), json_str);
1396     }
1397 }
1398