• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 pub mod android;
6 pub mod fdt;
7 pub mod pstore;
8 pub mod serial;
9 
10 use std::collections::BTreeMap;
11 use std::error::Error as StdError;
12 use std::fs::File;
13 use std::io::{self, Read, Seek, SeekFrom};
14 use std::path::PathBuf;
15 use std::sync::Arc;
16 
17 use acpi_tables::aml::Aml;
18 use acpi_tables::sdt::SDT;
19 use base::{syslog, AsRawDescriptor, AsRawDescriptors, Event, Tube};
20 use devices::virtio::VirtioDevice;
21 use devices::{
22     BarRange, Bus, BusDevice, BusDeviceObj, BusError, BusResumeDevice, HotPlugBus, IrqChip,
23     PciAddress, PciBridge, PciDevice, PciDeviceError, PciInterruptPin, PciRoot, ProxyDevice,
24     SerialHardware, SerialParameters, VfioPlatformDevice,
25 };
26 use hypervisor::{IoEventAddress, ProtectionType, Vm};
27 use minijail::Minijail;
28 use remain::sorted;
29 use resources::{MmioType, SystemAllocator, SystemAllocatorConfig};
30 use sync::Mutex;
31 use thiserror::Error;
32 use vm_control::{BatControl, BatteryType, PmResource};
33 use vm_memory::{GuestAddress, GuestMemory, GuestMemoryError};
34 
35 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
36 use gdbstub_arch::x86::reg::X86_64CoreRegs as GdbStubRegs;
37 
38 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
39 use {
40     devices::IrqChipAArch64 as IrqChipArch,
41     hypervisor::{Hypervisor as HypervisorArch, VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
42 };
43 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
44 use {
45     devices::IrqChipX86_64 as IrqChipArch,
46     hypervisor::{HypervisorX86_64 as HypervisorArch, VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
47 };
48 
49 pub use serial::{
50     add_serial_devices, get_serial_cmdline, set_default_serial_parameters, GetSerialCmdlineError,
51     SERIAL_ADDR,
52 };
53 
54 pub enum VmImage {
55     Kernel(File),
56     Bios(File),
57 }
58 
59 #[derive(Clone)]
60 pub struct Pstore {
61     pub path: PathBuf,
62     pub size: u32,
63 }
64 
65 /// Mapping of guest VCPU threads to host CPU cores.
66 #[derive(Clone, Debug, PartialEq)]
67 pub enum VcpuAffinity {
68     /// All VCPU threads will be pinned to the same set of host CPU cores.
69     Global(Vec<usize>),
70     /// Each VCPU may be pinned to a set of host CPU cores.
71     /// The map key is a guest VCPU index, and the corresponding value is the set of
72     /// host CPU indices that the VCPU thread will be allowed to run on.
73     /// If a VCPU index is not present in the map, its affinity will not be set.
74     PerVcpu(BTreeMap<usize, Vec<usize>>),
75 }
76 
77 /// Holds the pieces needed to build a VM. Passed to `build_vm` in the `LinuxArch` trait below to
78 /// create a `RunnableLinuxVm`.
79 pub struct VmComponents {
80     pub memory_size: u64,
81     pub swiotlb: Option<u64>,
82     pub vcpu_count: usize,
83     pub vcpu_affinity: Option<VcpuAffinity>,
84     pub cpu_clusters: Vec<Vec<usize>>,
85     pub cpu_capacity: BTreeMap<usize, u32>,
86     pub no_smt: bool,
87     pub hugepages: bool,
88     pub vm_image: VmImage,
89     pub android_fstab: Option<File>,
90     pub pstore: Option<Pstore>,
91     pub initrd_image: Option<File>,
92     pub extra_kernel_params: Vec<String>,
93     pub acpi_sdts: Vec<SDT>,
94     pub rt_cpus: Vec<usize>,
95     pub delay_rt: bool,
96     pub protected_vm: ProtectionType,
97     #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
98     pub gdb: Option<(u32, Tube)>, // port and control tube.
99     pub dmi_path: Option<PathBuf>,
100     pub no_legacy: bool,
101     pub host_cpu_topology: bool,
102     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
103     pub force_s2idle: bool,
104     #[cfg(feature = "direct")]
105     pub direct_gpe: Vec<u32>,
106 }
107 
108 /// Holds the elements needed to run a Linux VM. Created by `build_vm`.
109 pub struct RunnableLinuxVm<V: VmArch, Vcpu: VcpuArch> {
110     pub vm: V,
111     pub vcpu_count: usize,
112     /// If vcpus is None, then it's the responsibility of the vcpu thread to create vcpus.
113     /// If it's Some, then `build_vm` already created the vcpus.
114     pub vcpus: Option<Vec<Vcpu>>,
115     pub vcpu_affinity: Option<VcpuAffinity>,
116     pub no_smt: bool,
117     pub irq_chip: Box<dyn IrqChipArch>,
118     pub has_bios: bool,
119     pub io_bus: Arc<Bus>,
120     pub mmio_bus: Arc<Bus>,
121     pub pid_debug_label_map: BTreeMap<u32, String>,
122     pub suspend_evt: Event,
123     pub rt_cpus: Vec<usize>,
124     pub delay_rt: bool,
125     pub bat_control: Option<BatControl>,
126     #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
127     pub gdb: Option<(u32, Tube)>,
128     pub pm: Option<Arc<Mutex<dyn PmResource>>>,
129     /// Devices to be notified before the system resumes from the S3 suspended state.
130     pub resume_notify_devices: Vec<Arc<Mutex<dyn BusResumeDevice>>>,
131     pub root_config: Arc<Mutex<PciRoot>>,
132     pub hotplug_bus: Vec<Arc<Mutex<dyn HotPlugBus>>>,
133 }
134 
135 /// The device and optional jail.
136 pub struct VirtioDeviceStub {
137     pub dev: Box<dyn VirtioDevice>,
138     pub jail: Option<Minijail>,
139 }
140 
141 /// Trait which is implemented for each Linux Architecture in order to
142 /// set up the memory, cpus, and system devices and to boot the kernel.
143 pub trait LinuxArch {
144     type Error: StdError;
145 
146     /// Returns a Vec of the valid memory addresses as pairs of address and length. These should be
147     /// used to configure the `GuestMemory` structure for the platform.
148     ///
149     /// # Arguments
150     ///
151     /// * `components` - Parts used to determine the memory layout.
guest_memory_layout( components: &VmComponents, ) -> std::result::Result<Vec<(GuestAddress, u64)>, Self::Error>152     fn guest_memory_layout(
153         components: &VmComponents,
154     ) -> std::result::Result<Vec<(GuestAddress, u64)>, Self::Error>;
155 
156     /// Gets the configuration for a new `SystemAllocator` that fits the given `Vm`'s memory layout.
157     ///
158     /// This is the per-architecture template for constructing the `SystemAllocator`. Platform
159     /// agnostic modifications may be made to this configuration, but the final `SystemAllocator`
160     /// will be at least as strict as this configuration.
161     ///
162     /// # Arguments
163     ///
164     /// * `vm` - The virtual machine to be used as a template for the `SystemAllocator`.
get_system_allocator_config<V: Vm>(vm: &V) -> SystemAllocatorConfig165     fn get_system_allocator_config<V: Vm>(vm: &V) -> SystemAllocatorConfig;
166 
167     /// Takes `VmComponents` and generates a `RunnableLinuxVm`.
168     ///
169     /// # Arguments
170     ///
171     /// * `components` - Parts to use to build the VM.
172     /// * `exit_evt` - Event used by sub-devices to request that crosvm exit because guest
173     ///     wants to stop/shut down.
174     /// * `reset_evt` - Event used by sub-devices to request that crosvm exit because guest
175     ///     requested reset.
176     /// * `system_allocator` - Allocator created by this trait's implementation of
177     ///   `get_system_allocator_config`.
178     /// * `serial_parameters` - Definitions for how the serial devices should be configured.
179     /// * `serial_jail` - Jail used for serial devices created here.
180     /// * `battery` - Defines what battery device will be created.
181     /// * `vm` - A VM implementation to build upon.
182     /// * `ramoops_region` - Region allocated for ramoops.
183     /// * `devices` - The devices to be built into the VM.
184     /// * `irq_chip` - The IRQ chip implemention for the VM.
build_vm<V, Vcpu>( components: VmComponents, exit_evt: &Event, reset_evt: &Event, system_allocator: &mut SystemAllocator, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, serial_jail: Option<Minijail>, battery: (&Option<BatteryType>, Option<Minijail>), vm: V, ramoops_region: Option<pstore::RamoopsRegion>, devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>, irq_chip: &mut dyn IrqChipArch, kvm_vcpu_ids: &mut Vec<usize>, ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error> where V: VmArch, Vcpu: VcpuArch185     fn build_vm<V, Vcpu>(
186         components: VmComponents,
187         exit_evt: &Event,
188         reset_evt: &Event,
189         system_allocator: &mut SystemAllocator,
190         serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
191         serial_jail: Option<Minijail>,
192         battery: (&Option<BatteryType>, Option<Minijail>),
193         vm: V,
194         ramoops_region: Option<pstore::RamoopsRegion>,
195         devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
196         irq_chip: &mut dyn IrqChipArch,
197         kvm_vcpu_ids: &mut Vec<usize>,
198     ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
199     where
200         V: VmArch,
201         Vcpu: VcpuArch;
202 
203     /// Configures the vcpu and should be called once per vcpu from the vcpu's thread.
204     ///
205     /// # Arguments
206     ///
207     /// * `vm` - The virtual machine object.
208     /// * `hypervisor` - The `Hypervisor` that created the vcpu.
209     /// * `irq_chip` - The `IrqChip` associated with this vm.
210     /// * `vcpu` - The VCPU object to configure.
211     /// * `vcpu_id` - The id of the given `vcpu`.
212     /// * `num_cpus` - Number of virtual CPUs the guest will have.
213     /// * `has_bios` - Whether the `VmImage` is a `Bios` image
configure_vcpu<V: Vm>( vm: &V, hypervisor: &dyn HypervisorArch, irq_chip: &mut dyn IrqChipArch, vcpu: &mut dyn VcpuArch, vcpu_id: usize, num_cpus: usize, has_bios: bool, no_smt: bool, host_cpu_topology: bool, ) -> Result<(), Self::Error>214     fn configure_vcpu<V: Vm>(
215         vm: &V,
216         hypervisor: &dyn HypervisorArch,
217         irq_chip: &mut dyn IrqChipArch,
218         vcpu: &mut dyn VcpuArch,
219         vcpu_id: usize,
220         num_cpus: usize,
221         has_bios: bool,
222         no_smt: bool,
223         host_cpu_topology: bool,
224     ) -> Result<(), Self::Error>;
225 
226     /// Configures and add a pci device into vm
register_pci_device<V: VmArch, Vcpu: VcpuArch>( linux: &mut RunnableLinuxVm<V, Vcpu>, device: Box<dyn PciDevice>, minijail: Option<Minijail>, resources: &mut SystemAllocator, ) -> Result<PciAddress, Self::Error>227     fn register_pci_device<V: VmArch, Vcpu: VcpuArch>(
228         linux: &mut RunnableLinuxVm<V, Vcpu>,
229         device: Box<dyn PciDevice>,
230         minijail: Option<Minijail>,
231         resources: &mut SystemAllocator,
232     ) -> Result<PciAddress, Self::Error>;
233 
234     #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
235     /// Reads vCPU's registers.
debug_read_registers<T: VcpuArch>(vcpu: &T) -> Result<GdbStubRegs, Self::Error>236     fn debug_read_registers<T: VcpuArch>(vcpu: &T) -> Result<GdbStubRegs, Self::Error>;
237 
238     #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
239     /// Writes vCPU's registers.
debug_write_registers<T: VcpuArch>(vcpu: &T, regs: &GdbStubRegs) -> Result<(), Self::Error>240     fn debug_write_registers<T: VcpuArch>(vcpu: &T, regs: &GdbStubRegs) -> Result<(), Self::Error>;
241 
242     #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
243     /// Reads bytes from the guest memory.
debug_read_memory<T: VcpuArch>( vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, len: usize, ) -> Result<Vec<u8>, Self::Error>244     fn debug_read_memory<T: VcpuArch>(
245         vcpu: &T,
246         guest_mem: &GuestMemory,
247         vaddr: GuestAddress,
248         len: usize,
249     ) -> Result<Vec<u8>, Self::Error>;
250 
251     #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
252     /// Writes bytes to the specified guest memory.
debug_write_memory<T: VcpuArch>( vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, buf: &[u8], ) -> Result<(), Self::Error>253     fn debug_write_memory<T: VcpuArch>(
254         vcpu: &T,
255         guest_mem: &GuestMemory,
256         vaddr: GuestAddress,
257         buf: &[u8],
258     ) -> Result<(), Self::Error>;
259 
260     #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
261     /// Make the next vCPU's run single-step.
debug_enable_singlestep<T: VcpuArch>(vcpu: &T) -> Result<(), Self::Error>262     fn debug_enable_singlestep<T: VcpuArch>(vcpu: &T) -> Result<(), Self::Error>;
263 
264     #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
265     /// Set hardware breakpoints at the given addresses.
debug_set_hw_breakpoints<T: VcpuArch>( vcpu: &T, breakpoints: &[GuestAddress], ) -> Result<(), Self::Error>266     fn debug_set_hw_breakpoints<T: VcpuArch>(
267         vcpu: &T,
268         breakpoints: &[GuestAddress],
269     ) -> Result<(), Self::Error>;
270 }
271 
272 /// Errors for device manager.
273 #[sorted]
274 #[derive(Error, Debug)]
275 pub enum DeviceRegistrationError {
276     /// No more MMIO space available.
277     #[error("no more addresses are available")]
278     AddrsExhausted,
279     /// Could not allocate device address space for the device.
280     #[error("Allocating device addresses: {0}")]
281     AllocateDeviceAddrs(PciDeviceError),
282     /// Could not allocate IO space for the device.
283     #[error("Allocating IO addresses: {0}")]
284     AllocateIoAddrs(PciDeviceError),
285     /// Could not allocate MMIO or IO resource for the device.
286     #[error("Allocating IO resource: {0}")]
287     AllocateIoResource(resources::Error),
288     /// Could not allocate an IRQ number.
289     #[error("Allocating IRQ number")]
290     AllocateIrq,
291     /// Could not allocate IRQ resource for the device.
292     #[error("Allocating IRQ resource: {0}")]
293     AllocateIrqResource(devices::vfio::VfioError),
294     /// Unable to clone a jail for the device.
295     #[error("failed to clone jail: {0}")]
296     CloneJail(minijail::Error),
297     /// Appending to kernel command line failed.
298     #[error("unable to add device to kernel command line: {0}")]
299     Cmdline(kernel_cmdline::Error),
300     /// Configure window size failed.
301     #[error("failed to configure window size: {0}")]
302     ConfigureWindowSize(PciDeviceError),
303     // Unable to create a pipe.
304     #[error("failed to create pipe: {0}")]
305     CreatePipe(base::Error),
306     // Unable to create serial device from serial parameters
307     #[error("failed to create serial device: {0}")]
308     CreateSerialDevice(devices::SerialError),
309     // Unable to create tube
310     #[error("failed to create tube: {0}")]
311     CreateTube(base::TubeError),
312     /// Could not clone an event.
313     #[error("failed to clone event: {0}")]
314     EventClone(base::Error),
315     /// Could not create an event.
316     #[error("failed to create event: {0}")]
317     EventCreate(base::Error),
318     /// No more IRQs are available.
319     #[error("no more IRQs are available")]
320     IrqsExhausted,
321     /// Missing a required serial device.
322     #[error("missing required serial device {0}")]
323     MissingRequiredSerialDevice(u8),
324     /// Could not add a device to the mmio bus.
325     #[error("failed to add to mmio bus: {0}")]
326     MmioInsert(BusError),
327     /// Failed to initialize proxy device for jailed device.
328     #[error("failed to create proxy device: {0}")]
329     ProxyDeviceCreation(devices::ProxyError),
330     /// Failed to register battery device.
331     #[error("failed to register battery device to VM: {0}")]
332     RegisterBattery(devices::BatteryError),
333     /// Could not register PCI device capabilities.
334     #[error("could not register PCI device capabilities: {0}")]
335     RegisterDeviceCapabilities(PciDeviceError),
336     /// Failed to register ioevent with VM.
337     #[error("failed to register ioevent to VM: {0}")]
338     RegisterIoevent(base::Error),
339     /// Failed to register irq event with VM.
340     #[error("failed to register irq event to VM: {0}")]
341     RegisterIrqfd(base::Error),
342     /// Could not setup VFIO platform IRQ for the device.
343     #[error("Setting up VFIO platform IRQ: {0}")]
344     SetupVfioPlatformIrq(anyhow::Error),
345 }
346 
347 /// Config a PCI device for used by this vm.
configure_pci_device<V: VmArch, Vcpu: VcpuArch>( linux: &mut RunnableLinuxVm<V, Vcpu>, mut device: Box<dyn PciDevice>, jail: Option<Minijail>, resources: &mut SystemAllocator, ) -> Result<PciAddress, DeviceRegistrationError>348 pub fn configure_pci_device<V: VmArch, Vcpu: VcpuArch>(
349     linux: &mut RunnableLinuxVm<V, Vcpu>,
350     mut device: Box<dyn PciDevice>,
351     jail: Option<Minijail>,
352     resources: &mut SystemAllocator,
353 ) -> Result<PciAddress, DeviceRegistrationError> {
354     // Allocate PCI device address before allocating BARs.
355     let pci_address = device
356         .allocate_address(resources)
357         .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
358 
359     // Allocate ranges that may need to be in the low MMIO region (MmioType::Low).
360     let mmio_ranges = device
361         .allocate_io_bars(resources)
362         .map_err(DeviceRegistrationError::AllocateIoAddrs)?;
363 
364     // Allocate device ranges that may be in low or high MMIO after low-only ranges.
365     let device_ranges = device
366         .allocate_device_bars(resources)
367         .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
368 
369     // Do not suggest INTx for hot-plug devices.
370     let intx_event = devices::IrqLevelEvent::new().map_err(DeviceRegistrationError::EventCreate)?;
371 
372     if let Some((gsi, _pin)) = device.assign_irq(&intx_event, None) {
373         resources.reserve_irq(gsi);
374 
375         linux
376             .irq_chip
377             .as_irq_chip_mut()
378             .register_level_irq_event(gsi, &intx_event)
379             .map_err(DeviceRegistrationError::RegisterIrqfd)?;
380     }
381 
382     let mut keep_rds = device.keep_rds();
383     syslog::push_descriptors(&mut keep_rds);
384 
385     device
386         .register_device_capabilities()
387         .map_err(DeviceRegistrationError::RegisterDeviceCapabilities)?;
388     for (event, addr, datamatch) in device.ioevents() {
389         let io_addr = IoEventAddress::Mmio(addr);
390         linux
391             .vm
392             .register_ioevent(event, io_addr, datamatch)
393             .map_err(DeviceRegistrationError::RegisterIoevent)?;
394         keep_rds.push(event.as_raw_descriptor());
395     }
396     let arced_dev: Arc<Mutex<dyn BusDevice>> = if let Some(jail) = jail {
397         let proxy = ProxyDevice::new(device, &jail, keep_rds)
398             .map_err(DeviceRegistrationError::ProxyDeviceCreation)?;
399         linux
400             .pid_debug_label_map
401             .insert(proxy.pid() as u32, proxy.debug_label());
402         Arc::new(Mutex::new(proxy))
403     } else {
404         device.on_sandboxed();
405         Arc::new(Mutex::new(device))
406     };
407 
408     linux
409         .root_config
410         .lock()
411         .add_device(pci_address, arced_dev.clone());
412 
413     for range in &mmio_ranges {
414         linux
415             .mmio_bus
416             .insert(arced_dev.clone(), range.addr, range.size)
417             .map_err(DeviceRegistrationError::MmioInsert)?;
418     }
419 
420     for range in &device_ranges {
421         linux
422             .mmio_bus
423             .insert(arced_dev.clone(), range.addr, range.size)
424             .map_err(DeviceRegistrationError::MmioInsert)?;
425     }
426 
427     Ok(pci_address)
428 }
429 
430 /// Creates a platform device for use by this Vm.
generate_platform_bus( devices: Vec<(VfioPlatformDevice, Option<Minijail>)>, irq_chip: &mut dyn IrqChip, mmio_bus: &Bus, resources: &mut SystemAllocator, ) -> Result<BTreeMap<u32, String>, DeviceRegistrationError>431 pub fn generate_platform_bus(
432     devices: Vec<(VfioPlatformDevice, Option<Minijail>)>,
433     irq_chip: &mut dyn IrqChip,
434     mmio_bus: &Bus,
435     resources: &mut SystemAllocator,
436 ) -> Result<BTreeMap<u32, String>, DeviceRegistrationError> {
437     let mut pid_labels = BTreeMap::new();
438 
439     // Allocate ranges that may need to be in the Platform MMIO region (MmioType::Platform).
440     for (mut device, jail) in devices.into_iter() {
441         let ranges = device
442             .allocate_regions(resources)
443             .map_err(DeviceRegistrationError::AllocateIoResource)?;
444 
445         let mut keep_rds = device.keep_rds();
446         syslog::push_descriptors(&mut keep_rds);
447 
448         let irqs = device
449             .get_platform_irqs()
450             .map_err(DeviceRegistrationError::AllocateIrqResource)?;
451         for irq in irqs.into_iter() {
452             let irq_num = resources
453                 .allocate_irq()
454                 .ok_or(DeviceRegistrationError::AllocateIrq)?;
455 
456             if device.irq_is_automask(&irq) {
457                 let irq_evt =
458                     devices::IrqLevelEvent::new().map_err(DeviceRegistrationError::EventCreate)?;
459                 irq_chip
460                     .register_level_irq_event(irq_num, &irq_evt)
461                     .map_err(DeviceRegistrationError::RegisterIrqfd)?;
462                 device
463                     .assign_level_platform_irq(&irq_evt, irq.index)
464                     .map_err(DeviceRegistrationError::SetupVfioPlatformIrq)?;
465                 keep_rds.extend(irq_evt.as_raw_descriptors());
466             } else {
467                 let irq_evt =
468                     devices::IrqEdgeEvent::new().map_err(DeviceRegistrationError::EventCreate)?;
469                 irq_chip
470                     .register_edge_irq_event(irq_num, &irq_evt)
471                     .map_err(DeviceRegistrationError::RegisterIrqfd)?;
472                 device
473                     .assign_edge_platform_irq(&irq_evt, irq.index)
474                     .map_err(DeviceRegistrationError::SetupVfioPlatformIrq)?;
475                 keep_rds.extend(irq_evt.as_raw_descriptors());
476             }
477         }
478 
479         let arced_dev: Arc<Mutex<dyn BusDevice>> = if let Some(jail) = jail {
480             let proxy = ProxyDevice::new(device, &jail, keep_rds)
481                 .map_err(DeviceRegistrationError::ProxyDeviceCreation)?;
482             pid_labels.insert(proxy.pid() as u32, proxy.debug_label());
483             Arc::new(Mutex::new(proxy))
484         } else {
485             device.on_sandboxed();
486             Arc::new(Mutex::new(device))
487         };
488         for range in &ranges {
489             mmio_bus
490                 .insert(arced_dev.clone(), range.0, range.1)
491                 .map_err(DeviceRegistrationError::MmioInsert)?;
492         }
493     }
494     Ok(pid_labels)
495 }
496 
497 /// Creates a root PCI device for use by this Vm.
generate_pci_root( mut devices: Vec<(Box<dyn PciDevice>, Option<Minijail>)>, irq_chip: &mut dyn IrqChip, mmio_bus: Arc<Bus>, io_bus: Arc<Bus>, resources: &mut SystemAllocator, vm: &mut impl Vm, max_irqs: usize, ) -> Result< ( PciRoot, Vec<(PciAddress, u32, PciInterruptPin)>, BTreeMap<u32, String>, ), DeviceRegistrationError, >498 pub fn generate_pci_root(
499     mut devices: Vec<(Box<dyn PciDevice>, Option<Minijail>)>,
500     irq_chip: &mut dyn IrqChip,
501     mmio_bus: Arc<Bus>,
502     io_bus: Arc<Bus>,
503     resources: &mut SystemAllocator,
504     vm: &mut impl Vm,
505     max_irqs: usize,
506 ) -> Result<
507     (
508         PciRoot,
509         Vec<(PciAddress, u32, PciInterruptPin)>,
510         BTreeMap<u32, String>,
511     ),
512     DeviceRegistrationError,
513 > {
514     let mut root = PciRoot::new(Arc::downgrade(&mmio_bus), Arc::downgrade(&io_bus));
515     let mut pid_labels = BTreeMap::new();
516     // The map of (dev_idx, bus), find bus number through dev_idx in devices
517     let mut devid_buses: BTreeMap<usize, u8> = BTreeMap::new();
518     // The map of (bridge secondary bus number, Vec<sub device BarRange>)
519     let mut bridge_bar_ranges: BTreeMap<u8, Vec<BarRange>> = BTreeMap::new();
520 
521     // Allocate PCI device address before allocating BARs.
522     let mut device_addrs = Vec::<PciAddress>::new();
523     for (dev_idx, (device, _jail)) in devices.iter_mut().enumerate() {
524         let address = device
525             .allocate_address(resources)
526             .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
527         device_addrs.push(address);
528 
529         if address.bus > 0 {
530             devid_buses.insert(dev_idx, address.bus);
531         }
532 
533         if PciBridge::is_pci_bridge(device) {
534             let sec_bus = PciBridge::get_secondary_bus_num(device);
535             bridge_bar_ranges.insert(sec_bus, Vec::<BarRange>::new());
536         }
537     }
538 
539     // Allocate ranges that may need to be in the low MMIO region (MmioType::Low).
540     let mut io_ranges = BTreeMap::new();
541     for (dev_idx, (device, _jail)) in devices.iter_mut().enumerate() {
542         let mut ranges = device
543             .allocate_io_bars(resources)
544             .map_err(DeviceRegistrationError::AllocateIoAddrs)?;
545         io_ranges.insert(dev_idx, ranges.clone());
546 
547         if let Some(bus) = devid_buses.get(&dev_idx) {
548             if let Some(bridge_bar) = bridge_bar_ranges.get_mut(bus) {
549                 bridge_bar.append(&mut ranges);
550             }
551         }
552     }
553 
554     // Allocate device ranges that may be in low or high MMIO after low-only ranges.
555     let mut device_ranges = BTreeMap::new();
556     for (dev_idx, (device, _jail)) in devices.iter_mut().enumerate() {
557         let mut ranges = device
558             .allocate_device_bars(resources)
559             .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
560         device_ranges.insert(dev_idx, ranges.clone());
561 
562         if let Some(bus) = devid_buses.get(&dev_idx) {
563             if let Some(bridge_bar) = bridge_bar_ranges.get_mut(bus) {
564                 bridge_bar.append(&mut ranges);
565             }
566         }
567     }
568 
569     for (device, _jail) in devices.iter_mut() {
570         if PciBridge::is_pci_bridge(device) {
571             let sec_bus = PciBridge::get_secondary_bus_num(device);
572             if let Some(bridge_bar) = bridge_bar_ranges.get(&sec_bus) {
573                 device
574                     .configure_bridge_window(resources, bridge_bar)
575                     .map_err(DeviceRegistrationError::ConfigureWindowSize)?;
576             }
577         }
578     }
579 
580     // Allocate legacy INTx
581     let mut pci_irqs = Vec::new();
582     let mut irqs: Vec<Option<u32>> = vec![None; max_irqs];
583 
584     for (dev_idx, (device, _jail)) in devices.iter_mut().enumerate() {
585         // For default interrupt routing use next preallocated interrupt from the pool.
586         let irq_num = if let Some(irq) = irqs[dev_idx % max_irqs] {
587             irq
588         } else {
589             let irq = resources
590                 .allocate_irq()
591                 .ok_or(DeviceRegistrationError::AllocateIrq)?;
592             irqs[dev_idx % max_irqs] = Some(irq);
593             irq
594         };
595 
596         let intx_event =
597             devices::IrqLevelEvent::new().map_err(DeviceRegistrationError::EventCreate)?;
598 
599         if let Some((gsi, pin)) = device.assign_irq(&intx_event, Some(irq_num)) {
600             // reserve INTx if needed and non-default.
601             if gsi != irq_num {
602                 resources.reserve_irq(gsi);
603             };
604             irq_chip
605                 .register_level_irq_event(gsi, &intx_event)
606                 .map_err(DeviceRegistrationError::RegisterIrqfd)?;
607 
608             pci_irqs.push((device_addrs[dev_idx], gsi, pin));
609         }
610     }
611 
612     for (dev_idx, (mut device, jail)) in devices.into_iter().enumerate() {
613         let address = device_addrs[dev_idx];
614 
615         let mut keep_rds = device.keep_rds();
616         syslog::push_descriptors(&mut keep_rds);
617         keep_rds.append(&mut vm.get_memory().as_raw_descriptors());
618 
619         let ranges = io_ranges.remove(&dev_idx).unwrap_or_default();
620         let device_ranges = device_ranges.remove(&dev_idx).unwrap_or_default();
621         device
622             .register_device_capabilities()
623             .map_err(DeviceRegistrationError::RegisterDeviceCapabilities)?;
624         for (event, addr, datamatch) in device.ioevents() {
625             let io_addr = IoEventAddress::Mmio(addr);
626             vm.register_ioevent(event, io_addr, datamatch)
627                 .map_err(DeviceRegistrationError::RegisterIoevent)?;
628             keep_rds.push(event.as_raw_descriptor());
629         }
630 
631         let arced_dev: Arc<Mutex<dyn BusDevice>> = if let Some(jail) = jail {
632             let proxy = ProxyDevice::new(device, &jail, keep_rds)
633                 .map_err(DeviceRegistrationError::ProxyDeviceCreation)?;
634             pid_labels.insert(proxy.pid() as u32, proxy.debug_label());
635             Arc::new(Mutex::new(proxy))
636         } else {
637             device.on_sandboxed();
638             Arc::new(Mutex::new(device))
639         };
640         root.add_device(address, arced_dev.clone());
641         for range in &ranges {
642             mmio_bus
643                 .insert(arced_dev.clone(), range.addr, range.size)
644                 .map_err(DeviceRegistrationError::MmioInsert)?;
645         }
646 
647         for range in &device_ranges {
648             mmio_bus
649                 .insert(arced_dev.clone(), range.addr, range.size)
650                 .map_err(DeviceRegistrationError::MmioInsert)?;
651         }
652     }
653     Ok((root, pci_irqs, pid_labels))
654 }
655 
656 /// Adds goldfish battery
657 /// return the platform needed resouces include its AML data, irq number
658 ///
659 /// # Arguments
660 ///
661 /// * `amls` - the vector to put the goldfish battery AML
662 /// * `battery_jail` - used when sandbox is enabled
663 /// * `mmio_bus` - bus to add the devices to
664 /// * `irq_chip` - the IrqChip object for registering irq events
665 /// * `irq_num` - assigned interrupt to use
666 /// * `resources` - the SystemAllocator to allocate IO and MMIO for acpi
add_goldfish_battery( amls: &mut Vec<u8>, battery_jail: Option<Minijail>, mmio_bus: &Bus, irq_chip: &mut dyn IrqChip, irq_num: u32, resources: &mut SystemAllocator, ) -> Result<Tube, DeviceRegistrationError>667 pub fn add_goldfish_battery(
668     amls: &mut Vec<u8>,
669     battery_jail: Option<Minijail>,
670     mmio_bus: &Bus,
671     irq_chip: &mut dyn IrqChip,
672     irq_num: u32,
673     resources: &mut SystemAllocator,
674 ) -> Result<Tube, DeviceRegistrationError> {
675     let alloc = resources.get_anon_alloc();
676     let mmio_base = resources
677         .mmio_allocator(MmioType::Low)
678         .allocate_with_align(
679             devices::bat::GOLDFISHBAT_MMIO_LEN,
680             alloc,
681             "GoldfishBattery".to_string(),
682             devices::bat::GOLDFISHBAT_MMIO_LEN,
683         )
684         .map_err(DeviceRegistrationError::AllocateIoResource)?;
685 
686     let irq_evt = devices::IrqLevelEvent::new().map_err(DeviceRegistrationError::EventCreate)?;
687 
688     irq_chip
689         .register_level_irq_event(irq_num, &irq_evt)
690         .map_err(DeviceRegistrationError::RegisterIrqfd)?;
691 
692     let (control_tube, response_tube) =
693         Tube::pair().map_err(DeviceRegistrationError::CreateTube)?;
694 
695     #[cfg(feature = "power-monitor-powerd")]
696     let create_monitor = Some(Box::new(power_monitor::powerd::DBusMonitor::connect)
697         as Box<dyn power_monitor::CreatePowerMonitorFn>);
698 
699     #[cfg(not(feature = "power-monitor-powerd"))]
700     let create_monitor = None;
701 
702     let goldfish_bat =
703         devices::GoldfishBattery::new(mmio_base, irq_num, irq_evt, response_tube, create_monitor)
704             .map_err(DeviceRegistrationError::RegisterBattery)?;
705     goldfish_bat.to_aml_bytes(amls);
706 
707     match battery_jail.as_ref() {
708         Some(jail) => {
709             let mut keep_rds = goldfish_bat.keep_rds();
710             syslog::push_fds(&mut keep_rds);
711             mmio_bus
712                 .insert(
713                     Arc::new(Mutex::new(
714                         ProxyDevice::new(goldfish_bat, jail, keep_rds)
715                             .map_err(DeviceRegistrationError::ProxyDeviceCreation)?,
716                     )),
717                     mmio_base,
718                     devices::bat::GOLDFISHBAT_MMIO_LEN,
719                 )
720                 .map_err(DeviceRegistrationError::MmioInsert)?;
721         }
722         None => {
723             mmio_bus
724                 .insert(
725                     Arc::new(Mutex::new(goldfish_bat)),
726                     mmio_base,
727                     devices::bat::GOLDFISHBAT_MMIO_LEN,
728                 )
729                 .map_err(DeviceRegistrationError::MmioInsert)?;
730         }
731     }
732 
733     Ok(control_tube)
734 }
735 
736 /// Errors for image loading.
737 #[sorted]
738 #[derive(Error, Debug)]
739 pub enum LoadImageError {
740     #[error("Alignment not a power of two: {0}")]
741     BadAlignment(u64),
742     #[error("Image size too large: {0}")]
743     ImageSizeTooLarge(u64),
744     #[error("Reading image into memory failed: {0}")]
745     ReadToMemory(GuestMemoryError),
746     #[error("Seek failed: {0}")]
747     Seek(io::Error),
748 }
749 
750 /// Load an image from a file into guest memory.
751 ///
752 /// # Arguments
753 ///
754 /// * `guest_mem` - The memory to be used by the guest.
755 /// * `guest_addr` - The starting address to load the image in the guest memory.
756 /// * `max_size` - The amount of space in bytes available in the guest memory for the image.
757 /// * `image` - The file containing the image to be loaded.
758 ///
759 /// The size in bytes of the loaded image is returned.
load_image<F>( guest_mem: &GuestMemory, image: &mut F, guest_addr: GuestAddress, max_size: u64, ) -> Result<usize, LoadImageError> where F: Read + Seek + AsRawDescriptor,760 pub fn load_image<F>(
761     guest_mem: &GuestMemory,
762     image: &mut F,
763     guest_addr: GuestAddress,
764     max_size: u64,
765 ) -> Result<usize, LoadImageError>
766 where
767     F: Read + Seek + AsRawDescriptor,
768 {
769     let size = image.seek(SeekFrom::End(0)).map_err(LoadImageError::Seek)?;
770 
771     if size > usize::max_value() as u64 || size > max_size {
772         return Err(LoadImageError::ImageSizeTooLarge(size));
773     }
774 
775     // This is safe due to the bounds check above.
776     let size = size as usize;
777 
778     image
779         .seek(SeekFrom::Start(0))
780         .map_err(LoadImageError::Seek)?;
781 
782     guest_mem
783         .read_to_memory(guest_addr, image, size)
784         .map_err(LoadImageError::ReadToMemory)?;
785 
786     Ok(size)
787 }
788 
789 /// Load an image from a file into guest memory at the highest possible address.
790 ///
791 /// # Arguments
792 ///
793 /// * `guest_mem` - The memory to be used by the guest.
794 /// * `image` - The file containing the image to be loaded.
795 /// * `min_guest_addr` - The minimum address of the start of the image.
796 /// * `max_guest_addr` - The address to load the last byte of the image.
797 /// * `align` - The minimum alignment of the start address of the image in bytes
798 ///   (must be a power of two).
799 ///
800 /// The guest address and size in bytes of the loaded image are returned.
load_image_high<F>( guest_mem: &GuestMemory, image: &mut F, min_guest_addr: GuestAddress, max_guest_addr: GuestAddress, align: u64, ) -> Result<(GuestAddress, usize), LoadImageError> where F: Read + Seek + AsRawDescriptor,801 pub fn load_image_high<F>(
802     guest_mem: &GuestMemory,
803     image: &mut F,
804     min_guest_addr: GuestAddress,
805     max_guest_addr: GuestAddress,
806     align: u64,
807 ) -> Result<(GuestAddress, usize), LoadImageError>
808 where
809     F: Read + Seek + AsRawDescriptor,
810 {
811     if !align.is_power_of_two() {
812         return Err(LoadImageError::BadAlignment(align));
813     }
814 
815     let max_size = max_guest_addr.offset_from(min_guest_addr) & !(align - 1);
816     let size = image.seek(SeekFrom::End(0)).map_err(LoadImageError::Seek)?;
817 
818     if size > usize::max_value() as u64 || size > max_size {
819         return Err(LoadImageError::ImageSizeTooLarge(size));
820     }
821 
822     image
823         .seek(SeekFrom::Start(0))
824         .map_err(LoadImageError::Seek)?;
825 
826     // Load image at the maximum aligned address allowed.
827     // The subtraction cannot underflow because of the size checks above.
828     let guest_addr = GuestAddress((max_guest_addr.offset() - size) & !(align - 1));
829 
830     // This is safe due to the bounds check above.
831     let size = size as usize;
832 
833     guest_mem
834         .read_to_memory(guest_addr, image, size)
835         .map_err(LoadImageError::ReadToMemory)?;
836 
837     Ok((guest_addr, size))
838 }
839