• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 //! Virtual machine architecture support code.
6 
7 pub mod android;
8 pub mod fdt;
9 pub mod pstore;
10 pub mod serial;
11 
12 pub mod sys;
13 
14 use std::collections::BTreeMap;
15 use std::error::Error as StdError;
16 use std::fs::File;
17 use std::io;
18 use std::ops::Deref;
19 use std::path::PathBuf;
20 use std::str::FromStr;
21 use std::sync::mpsc;
22 use std::sync::mpsc::SendError;
23 use std::sync::Arc;
24 
25 use acpi_tables::sdt::SDT;
26 use base::syslog;
27 use base::AsRawDescriptor;
28 use base::AsRawDescriptors;
29 use base::Event;
30 use base::FileGetLen;
31 use base::FileReadWriteAtVolatile;
32 use base::SendTube;
33 use base::Tube;
34 use devices::virtio::VirtioDevice;
35 use devices::BarRange;
36 use devices::Bus;
37 use devices::BusDevice;
38 use devices::BusDeviceObj;
39 use devices::BusError;
40 use devices::BusResumeDevice;
41 use devices::FwCfgParameters;
42 use devices::GpeScope;
43 use devices::HotPlugBus;
44 use devices::IrqChip;
45 use devices::IrqEventSource;
46 use devices::PciAddress;
47 use devices::PciBus;
48 use devices::PciDevice;
49 use devices::PciDeviceError;
50 use devices::PciInterruptPin;
51 use devices::PciRoot;
52 use devices::PciRootCommand;
53 use devices::PreferredIrq;
54 #[cfg(any(target_os = "android", target_os = "linux"))]
55 use devices::ProxyDevice;
56 use devices::SerialHardware;
57 use devices::SerialParameters;
58 use devices::VirtioMmioDevice;
59 pub use fdt::apply_device_tree_overlays;
60 pub use fdt::DtbOverlay;
61 #[cfg(feature = "gdb")]
62 use gdbstub::arch::Arch;
63 use hypervisor::IoEventAddress;
64 use hypervisor::MemCacheType;
65 use hypervisor::Vm;
66 #[cfg(windows)]
67 use jail::FakeMinijailStub as Minijail;
68 #[cfg(any(target_os = "android", target_os = "linux"))]
69 use minijail::Minijail;
70 use remain::sorted;
71 #[cfg(target_arch = "x86_64")]
72 use resources::AddressRange;
73 use resources::SystemAllocator;
74 use resources::SystemAllocatorConfig;
75 use serde::de::Visitor;
76 use serde::Deserialize;
77 use serde::Serialize;
78 use serde_keyvalue::FromKeyValues;
79 pub use serial::add_serial_devices;
80 pub use serial::get_serial_cmdline;
81 pub use serial::set_default_serial_parameters;
82 pub use serial::GetSerialCmdlineError;
83 pub use serial::SERIAL_ADDR;
84 #[cfg(any(target_os = "android", target_os = "linux"))]
85 use sync::Condvar;
86 use sync::Mutex;
87 #[cfg(any(target_os = "android", target_os = "linux"))]
88 pub use sys::linux::PlatformBusResources;
89 use thiserror::Error;
90 use uuid::Uuid;
91 use vm_control::BatControl;
92 use vm_control::BatteryType;
93 use vm_control::PmResource;
94 use vm_memory::GuestAddress;
95 use vm_memory::GuestMemory;
96 use vm_memory::GuestMemoryError;
97 use vm_memory::MemoryRegionOptions;
98 
99 cfg_if::cfg_if! {
100     if #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] {
101         pub use devices::IrqChipAArch64 as IrqChipArch;
102         #[cfg(feature = "gdb")]
103         pub use gdbstub_arch::aarch64::AArch64 as GdbArch;
104         pub use hypervisor::CpuConfigAArch64 as CpuConfigArch;
105         pub use hypervisor::Hypervisor as HypervisorArch;
106         pub use hypervisor::VcpuAArch64 as VcpuArch;
107         pub use hypervisor::VcpuInitAArch64 as VcpuInitArch;
108         pub use hypervisor::VmAArch64 as VmArch;
109     } else if #[cfg(target_arch = "riscv64")] {
110         pub use devices::IrqChipRiscv64 as IrqChipArch;
111         #[cfg(feature = "gdb")]
112         pub use gdbstub_arch::riscv::Riscv64 as GdbArch;
113         pub use hypervisor::CpuConfigRiscv64 as CpuConfigArch;
114         pub use hypervisor::Hypervisor as HypervisorArch;
115         pub use hypervisor::VcpuInitRiscv64 as VcpuInitArch;
116         pub use hypervisor::VcpuRiscv64 as VcpuArch;
117         pub use hypervisor::VmRiscv64 as VmArch;
118     } else if #[cfg(target_arch = "x86_64")] {
119         pub use devices::IrqChipX86_64 as IrqChipArch;
120         #[cfg(feature = "gdb")]
121         pub use gdbstub_arch::x86::X86_64_SSE as GdbArch;
122         pub use hypervisor::CpuConfigX86_64 as CpuConfigArch;
123         pub use hypervisor::HypervisorX86_64 as HypervisorArch;
124         pub use hypervisor::VcpuInitX86_64 as VcpuInitArch;
125         pub use hypervisor::VcpuX86_64 as VcpuArch;
126         pub use hypervisor::VmX86_64 as VmArch;
127     }
128 }
129 
130 pub enum VmImage {
131     Kernel(File),
132     Bios(File),
133 }
134 
135 #[derive(Clone, Debug, Deserialize, Serialize, FromKeyValues, PartialEq, Eq)]
136 #[serde(deny_unknown_fields, rename_all = "kebab-case")]
137 pub struct Pstore {
138     pub path: PathBuf,
139     pub size: u32,
140 }
141 
142 /// Set of CPU cores.
143 #[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
144 pub struct CpuSet(Vec<usize>);
145 
146 impl CpuSet {
new<I: IntoIterator<Item = usize>>(cpus: I) -> Self147     pub fn new<I: IntoIterator<Item = usize>>(cpus: I) -> Self {
148         CpuSet(cpus.into_iter().collect())
149     }
150 
iter(&self) -> std::slice::Iter<'_, usize>151     pub fn iter(&self) -> std::slice::Iter<'_, usize> {
152         self.0.iter()
153     }
154 }
155 
156 impl FromIterator<usize> for CpuSet {
from_iter<T>(iter: T) -> Self where T: IntoIterator<Item = usize>,157     fn from_iter<T>(iter: T) -> Self
158     where
159         T: IntoIterator<Item = usize>,
160     {
161         CpuSet::new(iter)
162     }
163 }
164 
parse_cpu_range(s: &str, cpuset: &mut Vec<usize>) -> Result<(), String>165 fn parse_cpu_range(s: &str, cpuset: &mut Vec<usize>) -> Result<(), String> {
166     fn parse_cpu(s: &str) -> Result<usize, String> {
167         s.parse().map_err(|_| {
168             format!(
169                 "invalid CPU index {} - index must be a non-negative integer",
170                 s
171             )
172         })
173     }
174 
175     let (first_cpu, last_cpu) = match s.split_once('-') {
176         Some((first_cpu, last_cpu)) => {
177             let first_cpu = parse_cpu(first_cpu)?;
178             let last_cpu = parse_cpu(last_cpu)?;
179 
180             if last_cpu < first_cpu {
181                 return Err(format!(
182                     "invalid CPU range {} - ranges must be from low to high",
183                     s
184                 ));
185             }
186             (first_cpu, last_cpu)
187         }
188         None => {
189             let cpu = parse_cpu(s)?;
190             (cpu, cpu)
191         }
192     };
193 
194     cpuset.extend(first_cpu..=last_cpu);
195 
196     Ok(())
197 }
198 
199 impl FromStr for CpuSet {
200     type Err = String;
201 
from_str(s: &str) -> Result<Self, Self::Err>202     fn from_str(s: &str) -> Result<Self, Self::Err> {
203         let mut cpuset = Vec::new();
204         for part in s.split(',') {
205             parse_cpu_range(part, &mut cpuset)?;
206         }
207         Ok(CpuSet::new(cpuset))
208     }
209 }
210 
211 impl Deref for CpuSet {
212     type Target = Vec<usize>;
213 
deref(&self) -> &Self::Target214     fn deref(&self) -> &Self::Target {
215         &self.0
216     }
217 }
218 
219 impl IntoIterator for CpuSet {
220     type Item = usize;
221     type IntoIter = std::vec::IntoIter<Self::Item>;
222 
into_iter(self) -> Self::IntoIter223     fn into_iter(self) -> Self::IntoIter {
224         self.0.into_iter()
225     }
226 }
227 
228 /// Deserializes a `CpuSet` from a sequence which elements can either be integers, or strings
229 /// representing CPU ranges (e.g. `5-8`).
230 impl<'de> Deserialize<'de> for CpuSet {
deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: serde::Deserializer<'de>,231     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
232     where
233         D: serde::Deserializer<'de>,
234     {
235         struct CpuSetVisitor;
236         impl<'de> Visitor<'de> for CpuSetVisitor {
237             type Value = CpuSet;
238 
239             fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
240                 formatter.write_str("CpuSet")
241             }
242 
243             fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
244             where
245                 A: serde::de::SeqAccess<'de>,
246             {
247                 #[derive(Deserialize)]
248                 #[serde(untagged)]
249                 enum CpuSetValue<'a> {
250                     Single(usize),
251                     Range(&'a str),
252                 }
253 
254                 let mut cpus = Vec::new();
255                 while let Some(cpuset) = seq.next_element::<CpuSetValue>()? {
256                     match cpuset {
257                         CpuSetValue::Single(cpu) => cpus.push(cpu),
258                         CpuSetValue::Range(range) => {
259                             parse_cpu_range(range, &mut cpus).map_err(serde::de::Error::custom)?;
260                         }
261                     }
262                 }
263 
264                 Ok(CpuSet::new(cpus))
265             }
266         }
267 
268         deserializer.deserialize_seq(CpuSetVisitor)
269     }
270 }
271 
272 /// Serializes a `CpuSet` into a sequence of integers and strings representing CPU ranges.
273 impl Serialize for CpuSet {
serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,274     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
275     where
276         S: serde::Serializer,
277     {
278         use serde::ser::SerializeSeq;
279 
280         let mut seq = serializer.serialize_seq(None)?;
281 
282         // Factorize ranges into "a-b" strings.
283         let mut serialize_range = |start: usize, end: usize| -> Result<(), S::Error> {
284             if start == end {
285                 seq.serialize_element(&start)?;
286             } else {
287                 seq.serialize_element(&format!("{}-{}", start, end))?;
288             }
289 
290             Ok(())
291         };
292 
293         // Current range.
294         let mut range = None;
295         for core in &self.0 {
296             range = match range {
297                 None => Some((core, core)),
298                 Some((start, end)) if *end == *core - 1 => Some((start, core)),
299                 Some((start, end)) => {
300                     serialize_range(*start, *end)?;
301                     Some((core, core))
302                 }
303             };
304         }
305 
306         if let Some((start, end)) = range {
307             serialize_range(*start, *end)?;
308         }
309 
310         seq.end()
311     }
312 }
313 
314 /// Mapping of guest VCPU threads to host CPU cores.
315 #[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)]
316 pub enum VcpuAffinity {
317     /// All VCPU threads will be pinned to the same set of host CPU cores.
318     Global(CpuSet),
319     /// Each VCPU may be pinned to a set of host CPU cores.
320     /// The map key is a guest VCPU index, and the corresponding value is the set of
321     /// host CPU indices that the VCPU thread will be allowed to run on.
322     /// If a VCPU index is not present in the map, its affinity will not be set.
323     PerVcpu(BTreeMap<usize, CpuSet>),
324 }
325 
326 /// Holds the pieces needed to build a VM. Passed to `build_vm` in the `LinuxArch` trait below to
327 /// create a `RunnableLinuxVm`.
328 #[sorted]
329 pub struct VmComponents {
330     #[cfg(all(target_arch = "x86_64", unix))]
331     pub ac_adapter: bool,
332     pub acpi_sdts: Vec<SDT>,
333     pub android_fstab: Option<File>,
334     pub boot_cpu: usize,
335     pub bootorder_fw_cfg_blob: Vec<u8>,
336     #[cfg(target_arch = "x86_64")]
337     pub break_linux_pci_config_io: bool,
338     pub cpu_capacity: BTreeMap<usize, u32>,
339     pub cpu_clusters: Vec<CpuSet>,
340     #[cfg(all(
341         any(target_arch = "arm", target_arch = "aarch64"),
342         any(target_os = "android", target_os = "linux")
343     ))]
344     pub cpu_frequencies: BTreeMap<usize, Vec<u32>>,
345     pub delay_rt: bool,
346     pub dynamic_power_coefficient: BTreeMap<usize, u32>,
347     pub extra_kernel_params: Vec<String>,
348     #[cfg(target_arch = "x86_64")]
349     pub force_s2idle: bool,
350     pub fw_cfg_enable: bool,
351     pub fw_cfg_parameters: Vec<FwCfgParameters>,
352     #[cfg(feature = "gdb")]
353     pub gdb: Option<(u32, Tube)>, // port and control tube.
354     pub host_cpu_topology: bool,
355     pub hugepages: bool,
356     pub hv_cfg: hypervisor::Config,
357     pub initrd_image: Option<File>,
358     pub itmt: bool,
359     pub memory_size: u64,
360     pub no_i8042: bool,
361     pub no_rtc: bool,
362     pub no_smt: bool,
363     #[cfg(target_arch = "x86_64")]
364     pub pci_low_start: Option<u64>,
365     #[cfg(target_arch = "x86_64")]
366     pub pcie_ecam: Option<AddressRange>,
367     pub pflash_block_size: u32,
368     pub pflash_image: Option<File>,
369     pub pstore: Option<Pstore>,
370     /// A file to load as pVM firmware. Must be `Some` iff
371     /// `hv_cfg.protection_type == ProtectionType::UnprotectedWithFirmware`.
372     pub pvm_fw: Option<File>,
373     pub rt_cpus: CpuSet,
374     #[cfg(target_arch = "x86_64")]
375     pub smbios: SmbiosOptions,
376     pub swiotlb: Option<u64>,
377     pub vcpu_affinity: Option<VcpuAffinity>,
378     pub vcpu_count: usize,
379     #[cfg(all(
380         any(target_arch = "arm", target_arch = "aarch64"),
381         any(target_os = "android", target_os = "linux")
382     ))]
383     pub virt_cpufreq_socket: Option<std::os::unix::net::UnixStream>,
384     pub vm_image: VmImage,
385 }
386 
387 /// Holds the elements needed to run a Linux VM. Created by `build_vm`.
388 #[sorted]
389 pub struct RunnableLinuxVm<V: VmArch, Vcpu: VcpuArch> {
390     pub bat_control: Option<BatControl>,
391     pub delay_rt: bool,
392     pub devices_thread: Option<std::thread::JoinHandle<()>>,
393     #[cfg(feature = "gdb")]
394     pub gdb: Option<(u32, Tube)>,
395     pub hotplug_bus: BTreeMap<u8, Arc<Mutex<dyn HotPlugBus>>>,
396     pub io_bus: Arc<Bus>,
397     pub irq_chip: Box<dyn IrqChipArch>,
398     pub mmio_bus: Arc<Bus>,
399     pub no_smt: bool,
400     pub pid_debug_label_map: BTreeMap<u32, String>,
401     #[cfg(any(target_os = "android", target_os = "linux"))]
402     pub platform_devices: Vec<Arc<Mutex<dyn BusDevice>>>,
403     pub pm: Option<Arc<Mutex<dyn PmResource + Send>>>,
404     /// Devices to be notified before the system resumes from the S3 suspended state.
405     pub resume_notify_devices: Vec<Arc<Mutex<dyn BusResumeDevice>>>,
406     pub root_config: Arc<Mutex<PciRoot>>,
407     pub rt_cpus: CpuSet,
408     pub suspend_evt: Event,
409     pub vcpu_affinity: Option<VcpuAffinity>,
410     pub vcpu_count: usize,
411     pub vcpu_init: Vec<VcpuInitArch>,
412     /// If vcpus is None, then it's the responsibility of the vcpu thread to create vcpus.
413     /// If it's Some, then `build_vm` already created the vcpus.
414     pub vcpus: Option<Vec<Vcpu>>,
415     pub vm: V,
416     pub vm_request_tube: Option<Tube>,
417 }
418 
419 /// The device and optional jail.
420 pub struct VirtioDeviceStub {
421     pub dev: Box<dyn VirtioDevice>,
422     pub jail: Option<Minijail>,
423 }
424 
425 /// Trait which is implemented for each Linux Architecture in order to
426 /// set up the memory, cpus, and system devices and to boot the kernel.
427 pub trait LinuxArch {
428     type Error: StdError;
429 
430     /// Returns a Vec of the valid memory addresses as pairs of address and length. These should be
431     /// used to configure the `GuestMemory` structure for the platform.
432     ///
433     /// # Arguments
434     ///
435     /// * `components` - Parts used to determine the memory layout.
guest_memory_layout( components: &VmComponents, hypervisor: &impl hypervisor::Hypervisor, ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error>436     fn guest_memory_layout(
437         components: &VmComponents,
438         hypervisor: &impl hypervisor::Hypervisor,
439     ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error>;
440 
441     /// Gets the configuration for a new `SystemAllocator` that fits the given `Vm`'s memory layout.
442     ///
443     /// This is the per-architecture template for constructing the `SystemAllocator`. Platform
444     /// agnostic modifications may be made to this configuration, but the final `SystemAllocator`
445     /// will be at least as strict as this configuration.
446     ///
447     /// # Arguments
448     ///
449     /// * `vm` - The virtual machine to be used as a template for the `SystemAllocator`.
get_system_allocator_config<V: Vm>(vm: &V) -> SystemAllocatorConfig450     fn get_system_allocator_config<V: Vm>(vm: &V) -> SystemAllocatorConfig;
451 
452     /// Takes `VmComponents` and generates a `RunnableLinuxVm`.
453     ///
454     /// # Arguments
455     ///
456     /// * `components` - Parts to use to build the VM.
457     /// * `vm_evt_wrtube` - Tube used by sub-devices to request that crosvm exit because guest wants
458     ///   to stop/shut down or requested reset.
459     /// * `system_allocator` - Allocator created by this trait's implementation of
460     ///   `get_system_allocator_config`.
461     /// * `serial_parameters` - Definitions for how the serial devices should be configured.
462     /// * `serial_jail` - Jail used for serial devices created here.
463     /// * `battery` - Defines what battery device will be created.
464     /// * `vm` - A VM implementation to build upon.
465     /// * `ramoops_region` - Region allocated for ramoops.
466     /// * `devices` - The devices to be built into the VM.
467     /// * `irq_chip` - The IRQ chip implemention for the VM.
468     /// * `debugcon_jail` - Jail used for debugcon devices created here.
469     /// * `pflash_jail` - Jail used for pflash device created here.
470     /// * `fw_cfg_jail` - Jail used for fw_cfg device created here.
471     /// * `device_tree_overlays` - Device tree overlay binaries
build_vm<V, Vcpu>( components: VmComponents, vm_evt_wrtube: &SendTube, system_allocator: &mut SystemAllocator, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, serial_jail: Option<Minijail>, battery: (Option<BatteryType>, Option<Minijail>), vm: V, ramoops_region: Option<pstore::RamoopsRegion>, devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>, irq_chip: &mut dyn IrqChipArch, vcpu_ids: &mut Vec<usize>, dump_device_tree_blob: Option<PathBuf>, debugcon_jail: Option<Minijail>, #[cfg(target_arch = "x86_64")] pflash_jail: Option<Minijail>, #[cfg(target_arch = "x86_64")] fw_cfg_jail: Option<Minijail>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, #[cfg(any(target_os = "android", target_os = "linux"))] guest_suspended_cvar: Option< Arc<(Mutex<bool>, Condvar)>, >, device_tree_overlays: Vec<DtbOverlay>, ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error> where V: VmArch, Vcpu: VcpuArch472     fn build_vm<V, Vcpu>(
473         components: VmComponents,
474         vm_evt_wrtube: &SendTube,
475         system_allocator: &mut SystemAllocator,
476         serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
477         serial_jail: Option<Minijail>,
478         battery: (Option<BatteryType>, Option<Minijail>),
479         vm: V,
480         ramoops_region: Option<pstore::RamoopsRegion>,
481         devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
482         irq_chip: &mut dyn IrqChipArch,
483         vcpu_ids: &mut Vec<usize>,
484         dump_device_tree_blob: Option<PathBuf>,
485         debugcon_jail: Option<Minijail>,
486         #[cfg(target_arch = "x86_64")] pflash_jail: Option<Minijail>,
487         #[cfg(target_arch = "x86_64")] fw_cfg_jail: Option<Minijail>,
488         #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
489         #[cfg(any(target_os = "android", target_os = "linux"))] guest_suspended_cvar: Option<
490             Arc<(Mutex<bool>, Condvar)>,
491         >,
492         device_tree_overlays: Vec<DtbOverlay>,
493     ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
494     where
495         V: VmArch,
496         Vcpu: VcpuArch;
497 
498     /// Configures the vcpu and should be called once per vcpu from the vcpu's thread.
499     ///
500     /// # Arguments
501     ///
502     /// * `vm` - The virtual machine object.
503     /// * `hypervisor` - The `Hypervisor` that created the vcpu.
504     /// * `irq_chip` - The `IrqChip` associated with this vm.
505     /// * `vcpu` - The VCPU object to configure.
506     /// * `vcpu_init` - The data required to initialize VCPU registers and other state.
507     /// * `vcpu_id` - The id of the given `vcpu`.
508     /// * `num_cpus` - Number of virtual CPUs the guest will have.
509     /// * `cpu_config` - CPU feature configurations.
configure_vcpu<V: Vm>( vm: &V, hypervisor: &dyn HypervisorArch, irq_chip: &mut dyn IrqChipArch, vcpu: &mut dyn VcpuArch, vcpu_init: VcpuInitArch, vcpu_id: usize, num_cpus: usize, cpu_config: Option<CpuConfigArch>, ) -> Result<(), Self::Error>510     fn configure_vcpu<V: Vm>(
511         vm: &V,
512         hypervisor: &dyn HypervisorArch,
513         irq_chip: &mut dyn IrqChipArch,
514         vcpu: &mut dyn VcpuArch,
515         vcpu_init: VcpuInitArch,
516         vcpu_id: usize,
517         num_cpus: usize,
518         cpu_config: Option<CpuConfigArch>,
519     ) -> Result<(), Self::Error>;
520 
521     /// Configures and add a pci device into vm
register_pci_device<V: VmArch, Vcpu: VcpuArch>( linux: &mut RunnableLinuxVm<V, Vcpu>, device: Box<dyn PciDevice>, #[cfg(any(target_os = "android", target_os = "linux"))] minijail: Option<Minijail>, resources: &mut SystemAllocator, hp_control_tube: &mpsc::Sender<PciRootCommand>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, ) -> Result<PciAddress, Self::Error>522     fn register_pci_device<V: VmArch, Vcpu: VcpuArch>(
523         linux: &mut RunnableLinuxVm<V, Vcpu>,
524         device: Box<dyn PciDevice>,
525         #[cfg(any(target_os = "android", target_os = "linux"))] minijail: Option<Minijail>,
526         resources: &mut SystemAllocator,
527         hp_control_tube: &mpsc::Sender<PciRootCommand>,
528         #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
529     ) -> Result<PciAddress, Self::Error>;
530 
531     /// Returns frequency map for each of the host's logical cores.
get_host_cpu_frequencies_khz() -> Result<BTreeMap<usize, Vec<u32>>, Self::Error>532     fn get_host_cpu_frequencies_khz() -> Result<BTreeMap<usize, Vec<u32>>, Self::Error>;
533 
534     /// Returns capacity map of the host's logical cores.
get_host_cpu_capacity() -> Result<BTreeMap<usize, u32>, Self::Error>535     fn get_host_cpu_capacity() -> Result<BTreeMap<usize, u32>, Self::Error>;
536 
537     /// Returns cluster masks for each of the host's logical cores.
get_host_cpu_clusters() -> Result<Vec<CpuSet>, Self::Error>538     fn get_host_cpu_clusters() -> Result<Vec<CpuSet>, Self::Error>;
539 }
540 
541 #[cfg(feature = "gdb")]
542 pub trait GdbOps<T: VcpuArch> {
543     type Error: StdError;
544 
545     /// Reads vCPU's registers.
read_registers(vcpu: &T) -> Result<<GdbArch as Arch>::Registers, Self::Error>546     fn read_registers(vcpu: &T) -> Result<<GdbArch as Arch>::Registers, Self::Error>;
547 
548     /// Writes vCPU's registers.
write_registers(vcpu: &T, regs: &<GdbArch as Arch>::Registers) -> Result<(), Self::Error>549     fn write_registers(vcpu: &T, regs: &<GdbArch as Arch>::Registers) -> Result<(), Self::Error>;
550 
551     /// Reads bytes from the guest memory.
read_memory( vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, len: usize, ) -> Result<Vec<u8>, Self::Error>552     fn read_memory(
553         vcpu: &T,
554         guest_mem: &GuestMemory,
555         vaddr: GuestAddress,
556         len: usize,
557     ) -> Result<Vec<u8>, Self::Error>;
558 
559     /// Writes bytes to the specified guest memory.
write_memory( vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, buf: &[u8], ) -> Result<(), Self::Error>560     fn write_memory(
561         vcpu: &T,
562         guest_mem: &GuestMemory,
563         vaddr: GuestAddress,
564         buf: &[u8],
565     ) -> Result<(), Self::Error>;
566 
567     /// Reads bytes from the guest register.
read_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>, Self::Error>568     fn read_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>, Self::Error>;
569 
570     /// Writes bytes to the specified guest register.
write_register( vcpu: &T, reg_id: <GdbArch as Arch>::RegId, data: &[u8], ) -> Result<(), Self::Error>571     fn write_register(
572         vcpu: &T,
573         reg_id: <GdbArch as Arch>::RegId,
574         data: &[u8],
575     ) -> Result<(), Self::Error>;
576 
577     /// Make the next vCPU's run single-step.
enable_singlestep(vcpu: &T) -> Result<(), Self::Error>578     fn enable_singlestep(vcpu: &T) -> Result<(), Self::Error>;
579 
580     /// Get maximum number of hardware breakpoints.
get_max_hw_breakpoints(vcpu: &T) -> Result<usize, Self::Error>581     fn get_max_hw_breakpoints(vcpu: &T) -> Result<usize, Self::Error>;
582 
583     /// Set hardware breakpoints at the given addresses.
set_hw_breakpoints(vcpu: &T, breakpoints: &[GuestAddress]) -> Result<(), Self::Error>584     fn set_hw_breakpoints(vcpu: &T, breakpoints: &[GuestAddress]) -> Result<(), Self::Error>;
585 }
586 
587 /// Errors for device manager.
588 #[sorted]
589 #[derive(Error, Debug)]
590 pub enum DeviceRegistrationError {
591     /// No more MMIO space available.
592     #[error("no more addresses are available")]
593     AddrsExhausted,
594     /// Could not allocate device address space for the device.
595     #[error("Allocating device addresses: {0}")]
596     AllocateDeviceAddrs(PciDeviceError),
597     /// Could not allocate IO space for the device.
598     #[error("Allocating IO addresses: {0}")]
599     AllocateIoAddrs(PciDeviceError),
600     /// Could not allocate MMIO or IO resource for the device.
601     #[error("Allocating IO resource: {0}")]
602     AllocateIoResource(resources::Error),
603     /// Could not allocate an IRQ number.
604     #[error("Allocating IRQ number")]
605     AllocateIrq,
606     /// Could not allocate IRQ resource for the device.
607     #[cfg(any(target_os = "android", target_os = "linux"))]
608     #[error("Allocating IRQ resource: {0}")]
609     AllocateIrqResource(devices::vfio::VfioError),
610     /// Broken pci topology
611     #[error("pci topology is broken")]
612     BrokenPciTopology,
613     /// Unable to clone a jail for the device.
614     #[cfg(any(target_os = "android", target_os = "linux"))]
615     #[error("failed to clone jail: {0}")]
616     CloneJail(minijail::Error),
617     /// Appending to kernel command line failed.
618     #[error("unable to add device to kernel command line: {0}")]
619     Cmdline(kernel_cmdline::Error),
620     /// Configure window size failed.
621     #[error("failed to configure window size: {0}")]
622     ConfigureWindowSize(PciDeviceError),
623     // Unable to create a pipe.
624     #[error("failed to create pipe: {0}")]
625     CreatePipe(base::Error),
626     // Unable to create a root.
627     #[error("failed to create pci root: {0}")]
628     CreateRoot(anyhow::Error),
629     // Unable to create serial device from serial parameters
630     #[error("failed to create serial device: {0}")]
631     CreateSerialDevice(devices::SerialError),
632     // Unable to create tube
633     #[error("failed to create tube: {0}")]
634     CreateTube(base::TubeError),
635     /// Could not clone an event.
636     #[error("failed to clone event: {0}")]
637     EventClone(base::Error),
638     /// Could not create an event.
639     #[error("failed to create event: {0}")]
640     EventCreate(base::Error),
641     /// Failed to generate ACPI content.
642     #[error("failed to generate ACPI content")]
643     GenerateAcpi,
644     /// No more IRQs are available.
645     #[error("no more IRQs are available")]
646     IrqsExhausted,
647     /// VFIO device is missing a DT symbol.
648     #[error("cannot match VFIO device to DT node due to a missing symbol")]
649     MissingDeviceTreeSymbol,
650     /// Missing a required serial device.
651     #[error("missing required serial device {0}")]
652     MissingRequiredSerialDevice(u8),
653     /// Could not add a device to the mmio bus.
654     #[error("failed to add to mmio bus: {0}")]
655     MmioInsert(BusError),
656     /// Failed to insert device into PCI root.
657     #[error("failed to insert device into PCI root: {0}")]
658     PciRootAddDevice(PciDeviceError),
659     #[cfg(any(target_os = "android", target_os = "linux"))]
660     /// Failed to initialize proxy device for jailed device.
661     #[error("failed to create proxy device: {0}")]
662     ProxyDeviceCreation(devices::ProxyError),
663     #[cfg(any(target_os = "android", target_os = "linux"))]
664     /// Failed to register battery device.
665     #[error("failed to register battery device to VM: {0}")]
666     RegisterBattery(devices::BatteryError),
667     /// Could not register PCI device to pci root bus
668     #[error("failed to register PCI device to pci root bus")]
669     RegisterDevice(SendError<PciRootCommand>),
670     /// Could not register PCI device capabilities.
671     #[error("could not register PCI device capabilities: {0}")]
672     RegisterDeviceCapabilities(PciDeviceError),
673     /// Failed to register ioevent with VM.
674     #[error("failed to register ioevent to VM: {0}")]
675     RegisterIoevent(base::Error),
676     /// Failed to register irq event with VM.
677     #[error("failed to register irq event to VM: {0}")]
678     RegisterIrqfd(base::Error),
679     /// Could not setup VFIO platform IRQ for the device.
680     #[error("Setting up VFIO platform IRQ: {0}")]
681     SetupVfioPlatformIrq(anyhow::Error),
682 }
683 
684 /// Config a PCI device for used by this vm.
configure_pci_device<V: VmArch, Vcpu: VcpuArch>( linux: &mut RunnableLinuxVm<V, Vcpu>, mut device: Box<dyn PciDevice>, #[cfg(any(target_os = "android", target_os = "linux"))] jail: Option<Minijail>, resources: &mut SystemAllocator, hp_control_tube: &mpsc::Sender<PciRootCommand>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, ) -> Result<PciAddress, DeviceRegistrationError>685 pub fn configure_pci_device<V: VmArch, Vcpu: VcpuArch>(
686     linux: &mut RunnableLinuxVm<V, Vcpu>,
687     mut device: Box<dyn PciDevice>,
688     #[cfg(any(target_os = "android", target_os = "linux"))] jail: Option<Minijail>,
689     resources: &mut SystemAllocator,
690     hp_control_tube: &mpsc::Sender<PciRootCommand>,
691     #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
692 ) -> Result<PciAddress, DeviceRegistrationError> {
693     // Allocate PCI device address before allocating BARs.
694     let pci_address = device
695         .allocate_address(resources)
696         .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
697 
698     // Allocate ranges that may need to be in the low MMIO region (MmioType::Low).
699     let mmio_ranges = device
700         .allocate_io_bars(resources)
701         .map_err(DeviceRegistrationError::AllocateIoAddrs)?;
702 
703     // Allocate device ranges that may be in low or high MMIO after low-only ranges.
704     let device_ranges = device
705         .allocate_device_bars(resources)
706         .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
707 
708     // If device is a pcie bridge, add its pci bus to pci root
709     if let Some(pci_bus) = device.get_new_pci_bus() {
710         hp_control_tube
711             .send(PciRootCommand::AddBridge(pci_bus))
712             .map_err(DeviceRegistrationError::RegisterDevice)?;
713         let bar_ranges = Vec::new();
714         device
715             .configure_bridge_window(resources, &bar_ranges)
716             .map_err(DeviceRegistrationError::ConfigureWindowSize)?;
717     }
718 
719     // Do not suggest INTx for hot-plug devices.
720     let intx_event = devices::IrqLevelEvent::new().map_err(DeviceRegistrationError::EventCreate)?;
721 
722     if let PreferredIrq::Fixed { pin, gsi } = device.preferred_irq() {
723         resources.reserve_irq(gsi);
724 
725         device.assign_irq(
726             intx_event
727                 .try_clone()
728                 .map_err(DeviceRegistrationError::EventClone)?,
729             pin,
730             gsi,
731         );
732 
733         linux
734             .irq_chip
735             .as_irq_chip_mut()
736             .register_level_irq_event(gsi, &intx_event, IrqEventSource::from_device(&device))
737             .map_err(DeviceRegistrationError::RegisterIrqfd)?;
738     }
739 
740     let mut keep_rds = device.keep_rds();
741     syslog::push_descriptors(&mut keep_rds);
742     cros_tracing::push_descriptors!(&mut keep_rds);
743     metrics::push_descriptors(&mut keep_rds);
744 
745     device
746         .register_device_capabilities()
747         .map_err(DeviceRegistrationError::RegisterDeviceCapabilities)?;
748 
749     #[cfg(any(target_os = "android", target_os = "linux"))]
750     let arced_dev: Arc<Mutex<dyn BusDevice>> = if let Some(jail) = jail {
751         let proxy = ProxyDevice::new(
752             device,
753             jail,
754             keep_rds,
755             #[cfg(feature = "swap")]
756             swap_controller,
757         )
758         .map_err(DeviceRegistrationError::ProxyDeviceCreation)?;
759         linux
760             .pid_debug_label_map
761             .insert(proxy.pid() as u32, proxy.debug_label());
762         Arc::new(Mutex::new(proxy))
763     } else {
764         device.on_sandboxed();
765         Arc::new(Mutex::new(device))
766     };
767 
768     #[cfg(windows)]
769     let arced_dev = {
770         device.on_sandboxed();
771         Arc::new(Mutex::new(device))
772     };
773 
774     #[cfg(any(target_os = "android", target_os = "linux"))]
775     hp_control_tube
776         .send(PciRootCommand::Add(pci_address, arced_dev.clone()))
777         .map_err(DeviceRegistrationError::RegisterDevice)?;
778 
779     for range in &mmio_ranges {
780         linux
781             .mmio_bus
782             .insert(arced_dev.clone(), range.addr, range.size)
783             .map_err(DeviceRegistrationError::MmioInsert)?;
784     }
785 
786     for range in &device_ranges {
787         linux
788             .mmio_bus
789             .insert(arced_dev.clone(), range.addr, range.size)
790             .map_err(DeviceRegistrationError::MmioInsert)?;
791     }
792 
793     Ok(pci_address)
794 }
795 
796 /// Creates Virtio MMIO devices for use by this Vm.
generate_virtio_mmio_bus( devices: Vec<(VirtioMmioDevice, Option<Minijail>)>, irq_chip: &mut dyn IrqChip, mmio_bus: &Bus, resources: &mut SystemAllocator, vm: &mut impl Vm, sdts: Vec<SDT>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, ) -> Result<(BTreeMap<u32, String>, Vec<SDT>), DeviceRegistrationError>797 pub fn generate_virtio_mmio_bus(
798     devices: Vec<(VirtioMmioDevice, Option<Minijail>)>,
799     irq_chip: &mut dyn IrqChip,
800     mmio_bus: &Bus,
801     resources: &mut SystemAllocator,
802     vm: &mut impl Vm,
803     sdts: Vec<SDT>,
804     #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
805 ) -> Result<(BTreeMap<u32, String>, Vec<SDT>), DeviceRegistrationError> {
806     #[cfg_attr(windows, allow(unused_mut))]
807     let mut pid_labels = BTreeMap::new();
808 
809     // sdts can be updated only on x86 platforms.
810     #[cfg(target_arch = "x86_64")]
811     let mut sdts = sdts;
812     for dev_value in devices.into_iter() {
813         #[cfg(any(target_os = "android", target_os = "linux"))]
814         let (mut device, jail) = dev_value;
815         #[cfg(windows)]
816         let (mut device, _) = dev_value;
817 
818         let ranges = device
819             .allocate_regions(resources)
820             .map_err(DeviceRegistrationError::AllocateIoResource)?;
821 
822         let mut keep_rds = device.keep_rds();
823         syslog::push_descriptors(&mut keep_rds);
824         cros_tracing::push_descriptors!(&mut keep_rds);
825         metrics::push_descriptors(&mut keep_rds);
826 
827         let irq_num = resources
828             .allocate_irq()
829             .ok_or(DeviceRegistrationError::AllocateIrq)?;
830         let irq_evt = devices::IrqEdgeEvent::new().map_err(DeviceRegistrationError::EventCreate)?;
831         irq_chip
832             .register_edge_irq_event(irq_num, &irq_evt, IrqEventSource::from_device(&device))
833             .map_err(DeviceRegistrationError::RegisterIrqfd)?;
834         device.assign_irq(&irq_evt, irq_num);
835         keep_rds.extend(irq_evt.as_raw_descriptors());
836 
837         for (event, addr, datamatch) in device.ioevents() {
838             let io_addr = IoEventAddress::Mmio(addr);
839             vm.register_ioevent(event, io_addr, datamatch)
840                 .map_err(DeviceRegistrationError::RegisterIoevent)?;
841             keep_rds.push(event.as_raw_descriptor());
842         }
843 
844         #[cfg(target_arch = "x86_64")]
845         {
846             sdts = device
847                 .generate_acpi(sdts)
848                 .ok_or(DeviceRegistrationError::GenerateAcpi)?;
849         }
850 
851         #[cfg(any(target_os = "android", target_os = "linux"))]
852         let arced_dev: Arc<Mutex<dyn BusDevice>> = if let Some(jail) = jail {
853             let proxy = ProxyDevice::new(
854                 device,
855                 jail,
856                 keep_rds,
857                 #[cfg(feature = "swap")]
858                 swap_controller,
859             )
860             .map_err(DeviceRegistrationError::ProxyDeviceCreation)?;
861             pid_labels.insert(proxy.pid() as u32, proxy.debug_label());
862             Arc::new(Mutex::new(proxy))
863         } else {
864             device.on_sandboxed();
865             Arc::new(Mutex::new(device))
866         };
867 
868         #[cfg(windows)]
869         let arced_dev = {
870             device.on_sandboxed();
871             Arc::new(Mutex::new(device))
872         };
873 
874         for range in &ranges {
875             mmio_bus
876                 .insert(arced_dev.clone(), range.0, range.1)
877                 .map_err(DeviceRegistrationError::MmioInsert)?;
878         }
879     }
880     Ok((pid_labels, sdts))
881 }
882 
883 // Generate pci topology starting from parent bus
generate_pci_topology( parent_bus: Arc<Mutex<PciBus>>, resources: &mut SystemAllocator, io_ranges: &mut BTreeMap<usize, Vec<BarRange>>, device_ranges: &mut BTreeMap<usize, Vec<BarRange>>, device_addrs: &[PciAddress], devices: &mut Vec<(Box<dyn PciDevice>, Option<Minijail>)>, ) -> Result<(Vec<BarRange>, u8), DeviceRegistrationError>884 pub fn generate_pci_topology(
885     parent_bus: Arc<Mutex<PciBus>>,
886     resources: &mut SystemAllocator,
887     io_ranges: &mut BTreeMap<usize, Vec<BarRange>>,
888     device_ranges: &mut BTreeMap<usize, Vec<BarRange>>,
889     device_addrs: &[PciAddress],
890     devices: &mut Vec<(Box<dyn PciDevice>, Option<Minijail>)>,
891 ) -> Result<(Vec<BarRange>, u8), DeviceRegistrationError> {
892     let mut bar_ranges = Vec::new();
893     let bus_num = parent_bus.lock().get_bus_num();
894     let mut subordinate_bus = bus_num;
895     for (dev_idx, addr) in device_addrs.iter().enumerate() {
896         // Only target for devices that located on this bus
897         if addr.bus == bus_num {
898             // If this device is a pci bridge (a.k.a., it has a pci bus structure),
899             // create its topology recursively
900             if let Some(child_bus) = devices[dev_idx].0.get_new_pci_bus() {
901                 let (child_bar_ranges, child_sub_bus) = generate_pci_topology(
902                     child_bus.clone(),
903                     resources,
904                     io_ranges,
905                     device_ranges,
906                     device_addrs,
907                     devices,
908                 )?;
909                 let device = &mut devices[dev_idx].0;
910                 parent_bus
911                     .lock()
912                     .add_child_bus(child_bus.clone())
913                     .map_err(|_| DeviceRegistrationError::BrokenPciTopology)?;
914                 let bridge_window = device
915                     .configure_bridge_window(resources, &child_bar_ranges)
916                     .map_err(DeviceRegistrationError::ConfigureWindowSize)?;
917                 bar_ranges.extend(bridge_window);
918 
919                 let ranges = device
920                     .allocate_io_bars(resources)
921                     .map_err(DeviceRegistrationError::AllocateIoAddrs)?;
922                 io_ranges.insert(dev_idx, ranges.clone());
923                 bar_ranges.extend(ranges);
924 
925                 let ranges = device
926                     .allocate_device_bars(resources)
927                     .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
928                 device_ranges.insert(dev_idx, ranges.clone());
929                 bar_ranges.extend(ranges);
930 
931                 device.set_subordinate_bus(child_sub_bus);
932 
933                 subordinate_bus = std::cmp::max(subordinate_bus, child_sub_bus);
934             }
935         }
936     }
937 
938     for (dev_idx, addr) in device_addrs.iter().enumerate() {
939         if addr.bus == bus_num {
940             let device = &mut devices[dev_idx].0;
941             // Allocate MMIO for non-bridge devices
942             if device.get_new_pci_bus().is_none() {
943                 let ranges = device
944                     .allocate_io_bars(resources)
945                     .map_err(DeviceRegistrationError::AllocateIoAddrs)?;
946                 io_ranges.insert(dev_idx, ranges.clone());
947                 bar_ranges.extend(ranges);
948 
949                 let ranges = device
950                     .allocate_device_bars(resources)
951                     .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
952                 device_ranges.insert(dev_idx, ranges.clone());
953                 bar_ranges.extend(ranges);
954             }
955         }
956     }
957     Ok((bar_ranges, subordinate_bus))
958 }
959 
960 /// Ensure all PCI devices have an assigned PCI address.
assign_pci_addresses( devices: &mut [(Box<dyn BusDeviceObj>, Option<Minijail>)], resources: &mut SystemAllocator, ) -> Result<(), DeviceRegistrationError>961 pub fn assign_pci_addresses(
962     devices: &mut [(Box<dyn BusDeviceObj>, Option<Minijail>)],
963     resources: &mut SystemAllocator,
964 ) -> Result<(), DeviceRegistrationError> {
965     // First allocate devices with a preferred address.
966     for pci_device in devices
967         .iter_mut()
968         .filter_map(|(device, _jail)| device.as_pci_device_mut())
969         .filter(|pci_device| pci_device.preferred_address().is_some())
970     {
971         let _ = pci_device
972             .allocate_address(resources)
973             .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
974     }
975 
976     // Then allocate addresses for the remaining devices.
977     for pci_device in devices
978         .iter_mut()
979         .filter_map(|(device, _jail)| device.as_pci_device_mut())
980         .filter(|pci_device| pci_device.preferred_address().is_none())
981     {
982         let _ = pci_device
983             .allocate_address(resources)
984             .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
985     }
986 
987     Ok(())
988 }
989 
990 /// Creates a root PCI device for use by this Vm.
generate_pci_root( mut devices: Vec<(Box<dyn PciDevice>, Option<Minijail>)>, irq_chip: &mut dyn IrqChip, mmio_bus: Arc<Bus>, mmio_base: GuestAddress, mmio_register_bit_num: usize, io_bus: Arc<Bus>, resources: &mut SystemAllocator, vm: &mut impl Vm, max_irqs: usize, vcfg_base: Option<u64>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, ) -> Result< ( PciRoot, Vec<(PciAddress, u32, PciInterruptPin)>, BTreeMap<u32, String>, BTreeMap<PciAddress, Vec<u8>>, BTreeMap<PciAddress, Vec<u8>>, ), DeviceRegistrationError, >991 pub fn generate_pci_root(
992     mut devices: Vec<(Box<dyn PciDevice>, Option<Minijail>)>,
993     irq_chip: &mut dyn IrqChip,
994     mmio_bus: Arc<Bus>,
995     mmio_base: GuestAddress,
996     mmio_register_bit_num: usize,
997     io_bus: Arc<Bus>,
998     resources: &mut SystemAllocator,
999     vm: &mut impl Vm,
1000     max_irqs: usize,
1001     vcfg_base: Option<u64>,
1002     #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
1003 ) -> Result<
1004     (
1005         PciRoot,
1006         Vec<(PciAddress, u32, PciInterruptPin)>,
1007         BTreeMap<u32, String>,
1008         BTreeMap<PciAddress, Vec<u8>>,
1009         BTreeMap<PciAddress, Vec<u8>>,
1010     ),
1011     DeviceRegistrationError,
1012 > {
1013     let mut device_addrs = Vec::new();
1014 
1015     for (device, _jail) in devices.iter_mut() {
1016         let address = device
1017             .allocate_address(resources)
1018             .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
1019         device_addrs.push(address);
1020     }
1021 
1022     let mut device_ranges = BTreeMap::new();
1023     let mut io_ranges = BTreeMap::new();
1024     let root_bus = Arc::new(Mutex::new(PciBus::new(0, 0, false)));
1025 
1026     generate_pci_topology(
1027         root_bus.clone(),
1028         resources,
1029         &mut io_ranges,
1030         &mut device_ranges,
1031         &device_addrs,
1032         &mut devices,
1033     )?;
1034 
1035     let mut root = PciRoot::new(
1036         vm,
1037         Arc::downgrade(&mmio_bus),
1038         mmio_base,
1039         mmio_register_bit_num,
1040         Arc::downgrade(&io_bus),
1041         root_bus,
1042     )
1043     .map_err(DeviceRegistrationError::CreateRoot)?;
1044     #[cfg_attr(windows, allow(unused_mut))]
1045     let mut pid_labels = BTreeMap::new();
1046 
1047     // Allocate legacy INTx
1048     let mut pci_irqs = Vec::new();
1049     let mut irqs: Vec<u32> = Vec::new();
1050 
1051     // Mapping of (bus, dev, pin) -> IRQ number.
1052     let mut dev_pin_irq = BTreeMap::new();
1053 
1054     for (dev_idx, (device, _jail)) in devices.iter_mut().enumerate() {
1055         let pci_address = device_addrs[dev_idx];
1056 
1057         let irq = match device.preferred_irq() {
1058             PreferredIrq::Fixed { pin, gsi } => {
1059                 // The device reported a preferred IRQ, so use that rather than allocating one.
1060                 resources.reserve_irq(gsi);
1061                 Some((pin, gsi))
1062             }
1063             PreferredIrq::Any => {
1064                 // The device did not provide a preferred IRQ but requested one, so allocate one.
1065 
1066                 // Choose a pin based on the slot's function number. Function 0 must always use
1067                 // INTA# for single-function devices per the PCI spec, and we choose to use INTA#
1068                 // for function 0 on multifunction devices and distribute the remaining functions
1069                 // evenly across the other pins.
1070                 let pin = match pci_address.func % 4 {
1071                     0 => PciInterruptPin::IntA,
1072                     1 => PciInterruptPin::IntB,
1073                     2 => PciInterruptPin::IntC,
1074                     _ => PciInterruptPin::IntD,
1075                 };
1076 
1077                 // If an IRQ number has already been assigned for a different function with this
1078                 // (bus, device, pin) combination, use it. Otherwise allocate a new one and insert
1079                 // it into the map.
1080                 let pin_key = (pci_address.bus, pci_address.dev, pin);
1081                 let irq_num = if let Some(irq_num) = dev_pin_irq.get(&pin_key) {
1082                     *irq_num
1083                 } else {
1084                     // If we have allocated fewer than `max_irqs` total, add a new irq to the `irqs`
1085                     // pool. Otherwise, share one of the existing `irqs`.
1086                     let irq_num = if irqs.len() < max_irqs {
1087                         let irq_num = resources
1088                             .allocate_irq()
1089                             .ok_or(DeviceRegistrationError::AllocateIrq)?;
1090                         irqs.push(irq_num);
1091                         irq_num
1092                     } else {
1093                         // Pick one of the existing IRQs to share, using `dev_idx` to distribute IRQ
1094                         // sharing evenly across devices.
1095                         irqs[dev_idx % max_irqs]
1096                     };
1097 
1098                     dev_pin_irq.insert(pin_key, irq_num);
1099                     irq_num
1100                 };
1101                 Some((pin, irq_num))
1102             }
1103             PreferredIrq::None => {
1104                 // The device does not want an INTx# IRQ.
1105                 None
1106             }
1107         };
1108 
1109         if let Some((pin, gsi)) = irq {
1110             let intx_event =
1111                 devices::IrqLevelEvent::new().map_err(DeviceRegistrationError::EventCreate)?;
1112 
1113             device.assign_irq(
1114                 intx_event
1115                     .try_clone()
1116                     .map_err(DeviceRegistrationError::EventClone)?,
1117                 pin,
1118                 gsi,
1119             );
1120 
1121             irq_chip
1122                 .register_level_irq_event(gsi, &intx_event, IrqEventSource::from_device(device))
1123                 .map_err(DeviceRegistrationError::RegisterIrqfd)?;
1124 
1125             pci_irqs.push((pci_address, gsi, pin));
1126         }
1127     }
1128 
1129     // To prevent issues where device's on_sandbox may spawn thread before all
1130     // sandboxed devices are sandboxed we partition iterator to go over sandboxed
1131     // first. This is needed on linux platforms. On windows, this is a no-op since
1132     // jails are always None, even for sandboxed devices.
1133     let devices = {
1134         let (sandboxed, non_sandboxed): (Vec<_>, Vec<_>) = devices
1135             .into_iter()
1136             .enumerate()
1137             .partition(|(_, (_, jail))| jail.is_some());
1138         sandboxed.into_iter().chain(non_sandboxed)
1139     };
1140 
1141     let mut amls = BTreeMap::new();
1142     let mut gpe_scope_amls = BTreeMap::new();
1143     for (dev_idx, dev_value) in devices {
1144         #[cfg(any(target_os = "android", target_os = "linux"))]
1145         let (mut device, jail) = dev_value;
1146         #[cfg(windows)]
1147         let (mut device, _) = dev_value;
1148         let address = device_addrs[dev_idx];
1149 
1150         let mut keep_rds = device.keep_rds();
1151         syslog::push_descriptors(&mut keep_rds);
1152         cros_tracing::push_descriptors!(&mut keep_rds);
1153         metrics::push_descriptors(&mut keep_rds);
1154         keep_rds.append(&mut vm.get_memory().as_raw_descriptors());
1155 
1156         let ranges = io_ranges.remove(&dev_idx).unwrap_or_default();
1157         let device_ranges = device_ranges.remove(&dev_idx).unwrap_or_default();
1158         device
1159             .register_device_capabilities()
1160             .map_err(DeviceRegistrationError::RegisterDeviceCapabilities)?;
1161 
1162         if let Some(vcfg_base) = vcfg_base {
1163             let (methods, shm) = device.generate_acpi_methods();
1164             if !methods.is_empty() {
1165                 amls.insert(address, methods);
1166             }
1167             if let Some((offset, mmap)) = shm {
1168                 let _ = vm.add_memory_region(
1169                     GuestAddress(vcfg_base + offset as u64),
1170                     Box::new(mmap),
1171                     false,
1172                     false,
1173                     MemCacheType::CacheCoherent,
1174                 );
1175             }
1176         }
1177         let gpe_nr = device.set_gpe(resources);
1178 
1179         #[cfg(any(target_os = "android", target_os = "linux"))]
1180         let arced_dev: Arc<Mutex<dyn BusDevice>> = if let Some(jail) = jail {
1181             let proxy = ProxyDevice::new(
1182                 device,
1183                 jail,
1184                 keep_rds,
1185                 #[cfg(feature = "swap")]
1186                 swap_controller,
1187             )
1188             .map_err(DeviceRegistrationError::ProxyDeviceCreation)?;
1189             pid_labels.insert(proxy.pid() as u32, proxy.debug_label());
1190             Arc::new(Mutex::new(proxy))
1191         } else {
1192             device.on_sandboxed();
1193             Arc::new(Mutex::new(device))
1194         };
1195         #[cfg(windows)]
1196         let arced_dev = {
1197             device.on_sandboxed();
1198             Arc::new(Mutex::new(device))
1199         };
1200         root.add_device(address, arced_dev.clone(), vm)
1201             .map_err(DeviceRegistrationError::PciRootAddDevice)?;
1202         for range in &ranges {
1203             mmio_bus
1204                 .insert(arced_dev.clone(), range.addr, range.size)
1205                 .map_err(DeviceRegistrationError::MmioInsert)?;
1206         }
1207 
1208         for range in &device_ranges {
1209             mmio_bus
1210                 .insert(arced_dev.clone(), range.addr, range.size)
1211                 .map_err(DeviceRegistrationError::MmioInsert)?;
1212         }
1213 
1214         if let Some(gpe_nr) = gpe_nr {
1215             if let Some(acpi_path) = root.acpi_path(&address) {
1216                 let mut gpe_aml = Vec::new();
1217 
1218                 GpeScope {}.cast_to_aml_bytes(
1219                     &mut gpe_aml,
1220                     gpe_nr,
1221                     format!("\\{}", acpi_path).as_str(),
1222                 );
1223                 if !gpe_aml.is_empty() {
1224                     gpe_scope_amls.insert(address, gpe_aml);
1225                 }
1226             }
1227         }
1228     }
1229 
1230     Ok((root, pci_irqs, pid_labels, amls, gpe_scope_amls))
1231 }
1232 
1233 /// Errors for image loading.
1234 #[sorted]
1235 #[derive(Error, Debug)]
1236 pub enum LoadImageError {
1237     #[error("Alignment not a power of two: {0}")]
1238     BadAlignment(u64),
1239     #[error("Getting image size failed: {0}")]
1240     GetLen(io::Error),
1241     #[error("GuestMemory get slice failed: {0}")]
1242     GuestMemorySlice(GuestMemoryError),
1243     #[error("Image size too large: {0}")]
1244     ImageSizeTooLarge(u64),
1245     #[error("Reading image into memory failed: {0}")]
1246     ReadToMemory(io::Error),
1247 }
1248 
1249 /// Load an image from a file into guest memory.
1250 ///
1251 /// # Arguments
1252 ///
1253 /// * `guest_mem` - The memory to be used by the guest.
1254 /// * `guest_addr` - The starting address to load the image in the guest memory.
1255 /// * `max_size` - The amount of space in bytes available in the guest memory for the image.
1256 /// * `image` - The file containing the image to be loaded.
1257 ///
1258 /// The size in bytes of the loaded image is returned.
load_image<F>( guest_mem: &GuestMemory, image: &mut F, guest_addr: GuestAddress, max_size: u64, ) -> Result<usize, LoadImageError> where F: FileReadWriteAtVolatile + FileGetLen,1259 pub fn load_image<F>(
1260     guest_mem: &GuestMemory,
1261     image: &mut F,
1262     guest_addr: GuestAddress,
1263     max_size: u64,
1264 ) -> Result<usize, LoadImageError>
1265 where
1266     F: FileReadWriteAtVolatile + FileGetLen,
1267 {
1268     let size = image.get_len().map_err(LoadImageError::GetLen)?;
1269 
1270     if size > usize::max_value() as u64 || size > max_size {
1271         return Err(LoadImageError::ImageSizeTooLarge(size));
1272     }
1273 
1274     // This is safe due to the bounds check above.
1275     let size = size as usize;
1276 
1277     let guest_slice = guest_mem
1278         .get_slice_at_addr(guest_addr, size)
1279         .map_err(LoadImageError::GuestMemorySlice)?;
1280     image
1281         .read_exact_at_volatile(guest_slice, 0)
1282         .map_err(LoadImageError::ReadToMemory)?;
1283 
1284     Ok(size)
1285 }
1286 
1287 /// Load an image from a file into guest memory at the highest possible address.
1288 ///
1289 /// # Arguments
1290 ///
1291 /// * `guest_mem` - The memory to be used by the guest.
1292 /// * `image` - The file containing the image to be loaded.
1293 /// * `min_guest_addr` - The minimum address of the start of the image.
1294 /// * `max_guest_addr` - The address to load the last byte of the image.
1295 /// * `align` - The minimum alignment of the start address of the image in bytes (must be a power of
1296 ///   two).
1297 ///
1298 /// The guest address and size in bytes of the loaded image are returned.
load_image_high<F>( guest_mem: &GuestMemory, image: &mut F, min_guest_addr: GuestAddress, max_guest_addr: GuestAddress, align: u64, ) -> Result<(GuestAddress, usize), LoadImageError> where F: FileReadWriteAtVolatile + FileGetLen,1299 pub fn load_image_high<F>(
1300     guest_mem: &GuestMemory,
1301     image: &mut F,
1302     min_guest_addr: GuestAddress,
1303     max_guest_addr: GuestAddress,
1304     align: u64,
1305 ) -> Result<(GuestAddress, usize), LoadImageError>
1306 where
1307     F: FileReadWriteAtVolatile + FileGetLen,
1308 {
1309     if !align.is_power_of_two() {
1310         return Err(LoadImageError::BadAlignment(align));
1311     }
1312 
1313     let max_size = max_guest_addr.offset_from(min_guest_addr) & !(align - 1);
1314     let size = image.get_len().map_err(LoadImageError::GetLen)?;
1315 
1316     if size > usize::max_value() as u64 || size > max_size {
1317         return Err(LoadImageError::ImageSizeTooLarge(size));
1318     }
1319 
1320     // Load image at the maximum aligned address allowed.
1321     // The subtraction cannot underflow because of the size checks above.
1322     let guest_addr = GuestAddress((max_guest_addr.offset() - size) & !(align - 1));
1323 
1324     // This is safe due to the bounds check above.
1325     let size = size as usize;
1326 
1327     let guest_slice = guest_mem
1328         .get_slice_at_addr(guest_addr, size)
1329         .map_err(LoadImageError::GuestMemorySlice)?;
1330     image
1331         .read_exact_at_volatile(guest_slice, 0)
1332         .map_err(LoadImageError::ReadToMemory)?;
1333 
1334     Ok((guest_addr, size))
1335 }
1336 
1337 /// SMBIOS table configuration
1338 #[derive(Clone, Debug, Default, Serialize, Deserialize, FromKeyValues, PartialEq, Eq)]
1339 #[serde(deny_unknown_fields, rename_all = "kebab-case")]
1340 pub struct SmbiosOptions {
1341     /// BIOS vendor name.
1342     pub bios_vendor: Option<String>,
1343 
1344     /// BIOS version number (free-form string).
1345     pub bios_version: Option<String>,
1346 
1347     /// System manufacturer name.
1348     pub manufacturer: Option<String>,
1349 
1350     /// System product name.
1351     pub product_name: Option<String>,
1352 
1353     /// System serial number (free-form string).
1354     pub serial_number: Option<String>,
1355 
1356     /// System UUID.
1357     pub uuid: Option<Uuid>,
1358 
1359     /// Additional OEM strings to add to SMBIOS table.
1360     #[serde(default)]
1361     pub oem_strings: Vec<String>,
1362 }
1363 
1364 #[cfg(test)]
1365 mod tests {
1366     use serde_keyvalue::from_key_values;
1367 
1368     use super::*;
1369 
1370     #[test]
parse_pstore()1371     fn parse_pstore() {
1372         let res: Pstore = from_key_values("path=/some/path,size=16384").unwrap();
1373         assert_eq!(
1374             res,
1375             Pstore {
1376                 path: "/some/path".into(),
1377                 size: 16384,
1378             }
1379         );
1380 
1381         let res = from_key_values::<Pstore>("path=/some/path");
1382         assert!(res.is_err());
1383 
1384         let res = from_key_values::<Pstore>("size=16384");
1385         assert!(res.is_err());
1386 
1387         let res = from_key_values::<Pstore>("");
1388         assert!(res.is_err());
1389     }
1390 
1391     #[test]
deserialize_cpuset_serde_kv()1392     fn deserialize_cpuset_serde_kv() {
1393         let res: CpuSet = from_key_values("[0,4,7]").unwrap();
1394         assert_eq!(res, CpuSet::new(vec![0, 4, 7]));
1395 
1396         let res: CpuSet = from_key_values("[9-12]").unwrap();
1397         assert_eq!(res, CpuSet::new(vec![9, 10, 11, 12]));
1398 
1399         let res: CpuSet = from_key_values("[0,4,7,9-12,15]").unwrap();
1400         assert_eq!(res, CpuSet::new(vec![0, 4, 7, 9, 10, 11, 12, 15]));
1401     }
1402 
1403     #[test]
deserialize_serialize_cpuset_json()1404     fn deserialize_serialize_cpuset_json() {
1405         let json_str = "[0,4,7]";
1406         let cpuset = CpuSet::new(vec![0, 4, 7]);
1407         let res: CpuSet = serde_json::from_str(json_str).unwrap();
1408         assert_eq!(res, cpuset);
1409         assert_eq!(serde_json::to_string(&cpuset).unwrap(), json_str);
1410 
1411         let json_str = r#"["9-12"]"#;
1412         let cpuset = CpuSet::new(vec![9, 10, 11, 12]);
1413         let res: CpuSet = serde_json::from_str(json_str).unwrap();
1414         assert_eq!(res, cpuset);
1415         assert_eq!(serde_json::to_string(&cpuset).unwrap(), json_str);
1416 
1417         let json_str = r#"[0,4,7,"9-12",15]"#;
1418         let cpuset = CpuSet::new(vec![0, 4, 7, 9, 10, 11, 12, 15]);
1419         let res: CpuSet = serde_json::from_str(json_str).unwrap();
1420         assert_eq!(res, cpuset);
1421         assert_eq!(serde_json::to_string(&cpuset).unwrap(), json_str);
1422     }
1423 }
1424