1 // Copyright 2018 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 pub mod android;
6 pub mod fdt;
7 pub mod pstore;
8 pub mod serial;
9
10 use std::collections::BTreeMap;
11 use std::error::Error as StdError;
12 use std::fs::File;
13 use std::io::{self, Read, Seek, SeekFrom};
14 use std::path::PathBuf;
15 use std::sync::Arc;
16
17 use acpi_tables::aml::Aml;
18 use acpi_tables::sdt::SDT;
19 use base::{syslog, AsRawDescriptor, AsRawDescriptors, Event, Tube};
20 use devices::virtio::VirtioDevice;
21 use devices::{
22 BarRange, Bus, BusDevice, BusDeviceObj, BusError, BusResumeDevice, HotPlugBus, IrqChip,
23 PciAddress, PciBridge, PciDevice, PciDeviceError, PciInterruptPin, PciRoot, ProxyDevice,
24 SerialHardware, SerialParameters, VfioPlatformDevice,
25 };
26 use hypervisor::{IoEventAddress, ProtectionType, Vm};
27 use minijail::Minijail;
28 use remain::sorted;
29 use resources::{MmioType, SystemAllocator, SystemAllocatorConfig};
30 use sync::Mutex;
31 use thiserror::Error;
32 use vm_control::{BatControl, BatteryType, PmResource};
33 use vm_memory::{GuestAddress, GuestMemory, GuestMemoryError};
34
35 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
36 use gdbstub_arch::x86::reg::X86_64CoreRegs as GdbStubRegs;
37
38 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
39 use {
40 devices::IrqChipAArch64 as IrqChipArch,
41 hypervisor::{Hypervisor as HypervisorArch, VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
42 };
43 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
44 use {
45 devices::IrqChipX86_64 as IrqChipArch,
46 hypervisor::{HypervisorX86_64 as HypervisorArch, VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
47 };
48
49 pub use serial::{
50 add_serial_devices, get_serial_cmdline, set_default_serial_parameters, GetSerialCmdlineError,
51 SERIAL_ADDR,
52 };
53
54 pub enum VmImage {
55 Kernel(File),
56 Bios(File),
57 }
58
59 #[derive(Clone)]
60 pub struct Pstore {
61 pub path: PathBuf,
62 pub size: u32,
63 }
64
65 /// Mapping of guest VCPU threads to host CPU cores.
66 #[derive(Clone, Debug, PartialEq)]
67 pub enum VcpuAffinity {
68 /// All VCPU threads will be pinned to the same set of host CPU cores.
69 Global(Vec<usize>),
70 /// Each VCPU may be pinned to a set of host CPU cores.
71 /// The map key is a guest VCPU index, and the corresponding value is the set of
72 /// host CPU indices that the VCPU thread will be allowed to run on.
73 /// If a VCPU index is not present in the map, its affinity will not be set.
74 PerVcpu(BTreeMap<usize, Vec<usize>>),
75 }
76
77 /// Holds the pieces needed to build a VM. Passed to `build_vm` in the `LinuxArch` trait below to
78 /// create a `RunnableLinuxVm`.
79 pub struct VmComponents {
80 pub memory_size: u64,
81 pub swiotlb: Option<u64>,
82 pub vcpu_count: usize,
83 pub vcpu_affinity: Option<VcpuAffinity>,
84 pub cpu_clusters: Vec<Vec<usize>>,
85 pub cpu_capacity: BTreeMap<usize, u32>,
86 pub no_smt: bool,
87 pub hugepages: bool,
88 pub vm_image: VmImage,
89 pub android_fstab: Option<File>,
90 pub pstore: Option<Pstore>,
91 pub initrd_image: Option<File>,
92 pub extra_kernel_params: Vec<String>,
93 pub acpi_sdts: Vec<SDT>,
94 pub rt_cpus: Vec<usize>,
95 pub delay_rt: bool,
96 pub protected_vm: ProtectionType,
97 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
98 pub gdb: Option<(u32, Tube)>, // port and control tube.
99 pub dmi_path: Option<PathBuf>,
100 pub no_legacy: bool,
101 pub host_cpu_topology: bool,
102 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
103 pub force_s2idle: bool,
104 #[cfg(feature = "direct")]
105 pub direct_gpe: Vec<u32>,
106 }
107
108 /// Holds the elements needed to run a Linux VM. Created by `build_vm`.
109 pub struct RunnableLinuxVm<V: VmArch, Vcpu: VcpuArch> {
110 pub vm: V,
111 pub vcpu_count: usize,
112 /// If vcpus is None, then it's the responsibility of the vcpu thread to create vcpus.
113 /// If it's Some, then `build_vm` already created the vcpus.
114 pub vcpus: Option<Vec<Vcpu>>,
115 pub vcpu_affinity: Option<VcpuAffinity>,
116 pub no_smt: bool,
117 pub irq_chip: Box<dyn IrqChipArch>,
118 pub has_bios: bool,
119 pub io_bus: Arc<Bus>,
120 pub mmio_bus: Arc<Bus>,
121 pub pid_debug_label_map: BTreeMap<u32, String>,
122 pub suspend_evt: Event,
123 pub rt_cpus: Vec<usize>,
124 pub delay_rt: bool,
125 pub bat_control: Option<BatControl>,
126 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
127 pub gdb: Option<(u32, Tube)>,
128 pub pm: Option<Arc<Mutex<dyn PmResource>>>,
129 /// Devices to be notified before the system resumes from the S3 suspended state.
130 pub resume_notify_devices: Vec<Arc<Mutex<dyn BusResumeDevice>>>,
131 pub root_config: Arc<Mutex<PciRoot>>,
132 pub hotplug_bus: Vec<Arc<Mutex<dyn HotPlugBus>>>,
133 }
134
135 /// The device and optional jail.
136 pub struct VirtioDeviceStub {
137 pub dev: Box<dyn VirtioDevice>,
138 pub jail: Option<Minijail>,
139 }
140
141 /// Trait which is implemented for each Linux Architecture in order to
142 /// set up the memory, cpus, and system devices and to boot the kernel.
143 pub trait LinuxArch {
144 type Error: StdError;
145
146 /// Returns a Vec of the valid memory addresses as pairs of address and length. These should be
147 /// used to configure the `GuestMemory` structure for the platform.
148 ///
149 /// # Arguments
150 ///
151 /// * `components` - Parts used to determine the memory layout.
guest_memory_layout( components: &VmComponents, ) -> std::result::Result<Vec<(GuestAddress, u64)>, Self::Error>152 fn guest_memory_layout(
153 components: &VmComponents,
154 ) -> std::result::Result<Vec<(GuestAddress, u64)>, Self::Error>;
155
156 /// Gets the configuration for a new `SystemAllocator` that fits the given `Vm`'s memory layout.
157 ///
158 /// This is the per-architecture template for constructing the `SystemAllocator`. Platform
159 /// agnostic modifications may be made to this configuration, but the final `SystemAllocator`
160 /// will be at least as strict as this configuration.
161 ///
162 /// # Arguments
163 ///
164 /// * `vm` - The virtual machine to be used as a template for the `SystemAllocator`.
get_system_allocator_config<V: Vm>(vm: &V) -> SystemAllocatorConfig165 fn get_system_allocator_config<V: Vm>(vm: &V) -> SystemAllocatorConfig;
166
167 /// Takes `VmComponents` and generates a `RunnableLinuxVm`.
168 ///
169 /// # Arguments
170 ///
171 /// * `components` - Parts to use to build the VM.
172 /// * `exit_evt` - Event used by sub-devices to request that crosvm exit because guest
173 /// wants to stop/shut down.
174 /// * `reset_evt` - Event used by sub-devices to request that crosvm exit because guest
175 /// requested reset.
176 /// * `system_allocator` - Allocator created by this trait's implementation of
177 /// `get_system_allocator_config`.
178 /// * `serial_parameters` - Definitions for how the serial devices should be configured.
179 /// * `serial_jail` - Jail used for serial devices created here.
180 /// * `battery` - Defines what battery device will be created.
181 /// * `vm` - A VM implementation to build upon.
182 /// * `ramoops_region` - Region allocated for ramoops.
183 /// * `devices` - The devices to be built into the VM.
184 /// * `irq_chip` - The IRQ chip implemention for the VM.
build_vm<V, Vcpu>( components: VmComponents, exit_evt: &Event, reset_evt: &Event, system_allocator: &mut SystemAllocator, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, serial_jail: Option<Minijail>, battery: (&Option<BatteryType>, Option<Minijail>), vm: V, ramoops_region: Option<pstore::RamoopsRegion>, devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>, irq_chip: &mut dyn IrqChipArch, kvm_vcpu_ids: &mut Vec<usize>, ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error> where V: VmArch, Vcpu: VcpuArch185 fn build_vm<V, Vcpu>(
186 components: VmComponents,
187 exit_evt: &Event,
188 reset_evt: &Event,
189 system_allocator: &mut SystemAllocator,
190 serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
191 serial_jail: Option<Minijail>,
192 battery: (&Option<BatteryType>, Option<Minijail>),
193 vm: V,
194 ramoops_region: Option<pstore::RamoopsRegion>,
195 devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
196 irq_chip: &mut dyn IrqChipArch,
197 kvm_vcpu_ids: &mut Vec<usize>,
198 ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
199 where
200 V: VmArch,
201 Vcpu: VcpuArch;
202
203 /// Configures the vcpu and should be called once per vcpu from the vcpu's thread.
204 ///
205 /// # Arguments
206 ///
207 /// * `vm` - The virtual machine object.
208 /// * `hypervisor` - The `Hypervisor` that created the vcpu.
209 /// * `irq_chip` - The `IrqChip` associated with this vm.
210 /// * `vcpu` - The VCPU object to configure.
211 /// * `vcpu_id` - The id of the given `vcpu`.
212 /// * `num_cpus` - Number of virtual CPUs the guest will have.
213 /// * `has_bios` - Whether the `VmImage` is a `Bios` image
configure_vcpu<V: Vm>( vm: &V, hypervisor: &dyn HypervisorArch, irq_chip: &mut dyn IrqChipArch, vcpu: &mut dyn VcpuArch, vcpu_id: usize, num_cpus: usize, has_bios: bool, no_smt: bool, host_cpu_topology: bool, ) -> Result<(), Self::Error>214 fn configure_vcpu<V: Vm>(
215 vm: &V,
216 hypervisor: &dyn HypervisorArch,
217 irq_chip: &mut dyn IrqChipArch,
218 vcpu: &mut dyn VcpuArch,
219 vcpu_id: usize,
220 num_cpus: usize,
221 has_bios: bool,
222 no_smt: bool,
223 host_cpu_topology: bool,
224 ) -> Result<(), Self::Error>;
225
226 /// Configures and add a pci device into vm
register_pci_device<V: VmArch, Vcpu: VcpuArch>( linux: &mut RunnableLinuxVm<V, Vcpu>, device: Box<dyn PciDevice>, minijail: Option<Minijail>, resources: &mut SystemAllocator, ) -> Result<PciAddress, Self::Error>227 fn register_pci_device<V: VmArch, Vcpu: VcpuArch>(
228 linux: &mut RunnableLinuxVm<V, Vcpu>,
229 device: Box<dyn PciDevice>,
230 minijail: Option<Minijail>,
231 resources: &mut SystemAllocator,
232 ) -> Result<PciAddress, Self::Error>;
233
234 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
235 /// Reads vCPU's registers.
debug_read_registers<T: VcpuArch>(vcpu: &T) -> Result<GdbStubRegs, Self::Error>236 fn debug_read_registers<T: VcpuArch>(vcpu: &T) -> Result<GdbStubRegs, Self::Error>;
237
238 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
239 /// Writes vCPU's registers.
debug_write_registers<T: VcpuArch>(vcpu: &T, regs: &GdbStubRegs) -> Result<(), Self::Error>240 fn debug_write_registers<T: VcpuArch>(vcpu: &T, regs: &GdbStubRegs) -> Result<(), Self::Error>;
241
242 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
243 /// Reads bytes from the guest memory.
debug_read_memory<T: VcpuArch>( vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, len: usize, ) -> Result<Vec<u8>, Self::Error>244 fn debug_read_memory<T: VcpuArch>(
245 vcpu: &T,
246 guest_mem: &GuestMemory,
247 vaddr: GuestAddress,
248 len: usize,
249 ) -> Result<Vec<u8>, Self::Error>;
250
251 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
252 /// Writes bytes to the specified guest memory.
debug_write_memory<T: VcpuArch>( vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, buf: &[u8], ) -> Result<(), Self::Error>253 fn debug_write_memory<T: VcpuArch>(
254 vcpu: &T,
255 guest_mem: &GuestMemory,
256 vaddr: GuestAddress,
257 buf: &[u8],
258 ) -> Result<(), Self::Error>;
259
260 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
261 /// Make the next vCPU's run single-step.
debug_enable_singlestep<T: VcpuArch>(vcpu: &T) -> Result<(), Self::Error>262 fn debug_enable_singlestep<T: VcpuArch>(vcpu: &T) -> Result<(), Self::Error>;
263
264 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
265 /// Set hardware breakpoints at the given addresses.
debug_set_hw_breakpoints<T: VcpuArch>( vcpu: &T, breakpoints: &[GuestAddress], ) -> Result<(), Self::Error>266 fn debug_set_hw_breakpoints<T: VcpuArch>(
267 vcpu: &T,
268 breakpoints: &[GuestAddress],
269 ) -> Result<(), Self::Error>;
270 }
271
272 /// Errors for device manager.
273 #[sorted]
274 #[derive(Error, Debug)]
275 pub enum DeviceRegistrationError {
276 /// No more MMIO space available.
277 #[error("no more addresses are available")]
278 AddrsExhausted,
279 /// Could not allocate device address space for the device.
280 #[error("Allocating device addresses: {0}")]
281 AllocateDeviceAddrs(PciDeviceError),
282 /// Could not allocate IO space for the device.
283 #[error("Allocating IO addresses: {0}")]
284 AllocateIoAddrs(PciDeviceError),
285 /// Could not allocate MMIO or IO resource for the device.
286 #[error("Allocating IO resource: {0}")]
287 AllocateIoResource(resources::Error),
288 /// Could not allocate an IRQ number.
289 #[error("Allocating IRQ number")]
290 AllocateIrq,
291 /// Could not allocate IRQ resource for the device.
292 #[error("Allocating IRQ resource: {0}")]
293 AllocateIrqResource(devices::vfio::VfioError),
294 /// Unable to clone a jail for the device.
295 #[error("failed to clone jail: {0}")]
296 CloneJail(minijail::Error),
297 /// Appending to kernel command line failed.
298 #[error("unable to add device to kernel command line: {0}")]
299 Cmdline(kernel_cmdline::Error),
300 /// Configure window size failed.
301 #[error("failed to configure window size: {0}")]
302 ConfigureWindowSize(PciDeviceError),
303 // Unable to create a pipe.
304 #[error("failed to create pipe: {0}")]
305 CreatePipe(base::Error),
306 // Unable to create serial device from serial parameters
307 #[error("failed to create serial device: {0}")]
308 CreateSerialDevice(devices::SerialError),
309 // Unable to create tube
310 #[error("failed to create tube: {0}")]
311 CreateTube(base::TubeError),
312 /// Could not clone an event.
313 #[error("failed to clone event: {0}")]
314 EventClone(base::Error),
315 /// Could not create an event.
316 #[error("failed to create event: {0}")]
317 EventCreate(base::Error),
318 /// No more IRQs are available.
319 #[error("no more IRQs are available")]
320 IrqsExhausted,
321 /// Missing a required serial device.
322 #[error("missing required serial device {0}")]
323 MissingRequiredSerialDevice(u8),
324 /// Could not add a device to the mmio bus.
325 #[error("failed to add to mmio bus: {0}")]
326 MmioInsert(BusError),
327 /// Failed to initialize proxy device for jailed device.
328 #[error("failed to create proxy device: {0}")]
329 ProxyDeviceCreation(devices::ProxyError),
330 /// Failed to register battery device.
331 #[error("failed to register battery device to VM: {0}")]
332 RegisterBattery(devices::BatteryError),
333 /// Could not register PCI device capabilities.
334 #[error("could not register PCI device capabilities: {0}")]
335 RegisterDeviceCapabilities(PciDeviceError),
336 /// Failed to register ioevent with VM.
337 #[error("failed to register ioevent to VM: {0}")]
338 RegisterIoevent(base::Error),
339 /// Failed to register irq event with VM.
340 #[error("failed to register irq event to VM: {0}")]
341 RegisterIrqfd(base::Error),
342 /// Could not setup VFIO platform IRQ for the device.
343 #[error("Setting up VFIO platform IRQ: {0}")]
344 SetupVfioPlatformIrq(anyhow::Error),
345 }
346
347 /// Config a PCI device for used by this vm.
configure_pci_device<V: VmArch, Vcpu: VcpuArch>( linux: &mut RunnableLinuxVm<V, Vcpu>, mut device: Box<dyn PciDevice>, jail: Option<Minijail>, resources: &mut SystemAllocator, ) -> Result<PciAddress, DeviceRegistrationError>348 pub fn configure_pci_device<V: VmArch, Vcpu: VcpuArch>(
349 linux: &mut RunnableLinuxVm<V, Vcpu>,
350 mut device: Box<dyn PciDevice>,
351 jail: Option<Minijail>,
352 resources: &mut SystemAllocator,
353 ) -> Result<PciAddress, DeviceRegistrationError> {
354 // Allocate PCI device address before allocating BARs.
355 let pci_address = device
356 .allocate_address(resources)
357 .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
358
359 // Allocate ranges that may need to be in the low MMIO region (MmioType::Low).
360 let mmio_ranges = device
361 .allocate_io_bars(resources)
362 .map_err(DeviceRegistrationError::AllocateIoAddrs)?;
363
364 // Allocate device ranges that may be in low or high MMIO after low-only ranges.
365 let device_ranges = device
366 .allocate_device_bars(resources)
367 .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
368
369 // Do not suggest INTx for hot-plug devices.
370 let intx_event = devices::IrqLevelEvent::new().map_err(DeviceRegistrationError::EventCreate)?;
371
372 if let Some((gsi, _pin)) = device.assign_irq(&intx_event, None) {
373 resources.reserve_irq(gsi);
374
375 linux
376 .irq_chip
377 .as_irq_chip_mut()
378 .register_level_irq_event(gsi, &intx_event)
379 .map_err(DeviceRegistrationError::RegisterIrqfd)?;
380 }
381
382 let mut keep_rds = device.keep_rds();
383 syslog::push_descriptors(&mut keep_rds);
384
385 device
386 .register_device_capabilities()
387 .map_err(DeviceRegistrationError::RegisterDeviceCapabilities)?;
388 for (event, addr, datamatch) in device.ioevents() {
389 let io_addr = IoEventAddress::Mmio(addr);
390 linux
391 .vm
392 .register_ioevent(event, io_addr, datamatch)
393 .map_err(DeviceRegistrationError::RegisterIoevent)?;
394 keep_rds.push(event.as_raw_descriptor());
395 }
396 let arced_dev: Arc<Mutex<dyn BusDevice>> = if let Some(jail) = jail {
397 let proxy = ProxyDevice::new(device, &jail, keep_rds)
398 .map_err(DeviceRegistrationError::ProxyDeviceCreation)?;
399 linux
400 .pid_debug_label_map
401 .insert(proxy.pid() as u32, proxy.debug_label());
402 Arc::new(Mutex::new(proxy))
403 } else {
404 device.on_sandboxed();
405 Arc::new(Mutex::new(device))
406 };
407
408 linux
409 .root_config
410 .lock()
411 .add_device(pci_address, arced_dev.clone());
412
413 for range in &mmio_ranges {
414 linux
415 .mmio_bus
416 .insert(arced_dev.clone(), range.addr, range.size)
417 .map_err(DeviceRegistrationError::MmioInsert)?;
418 }
419
420 for range in &device_ranges {
421 linux
422 .mmio_bus
423 .insert(arced_dev.clone(), range.addr, range.size)
424 .map_err(DeviceRegistrationError::MmioInsert)?;
425 }
426
427 Ok(pci_address)
428 }
429
430 /// Creates a platform device for use by this Vm.
generate_platform_bus( devices: Vec<(VfioPlatformDevice, Option<Minijail>)>, irq_chip: &mut dyn IrqChip, mmio_bus: &Bus, resources: &mut SystemAllocator, ) -> Result<BTreeMap<u32, String>, DeviceRegistrationError>431 pub fn generate_platform_bus(
432 devices: Vec<(VfioPlatformDevice, Option<Minijail>)>,
433 irq_chip: &mut dyn IrqChip,
434 mmio_bus: &Bus,
435 resources: &mut SystemAllocator,
436 ) -> Result<BTreeMap<u32, String>, DeviceRegistrationError> {
437 let mut pid_labels = BTreeMap::new();
438
439 // Allocate ranges that may need to be in the Platform MMIO region (MmioType::Platform).
440 for (mut device, jail) in devices.into_iter() {
441 let ranges = device
442 .allocate_regions(resources)
443 .map_err(DeviceRegistrationError::AllocateIoResource)?;
444
445 let mut keep_rds = device.keep_rds();
446 syslog::push_descriptors(&mut keep_rds);
447
448 let irqs = device
449 .get_platform_irqs()
450 .map_err(DeviceRegistrationError::AllocateIrqResource)?;
451 for irq in irqs.into_iter() {
452 let irq_num = resources
453 .allocate_irq()
454 .ok_or(DeviceRegistrationError::AllocateIrq)?;
455
456 if device.irq_is_automask(&irq) {
457 let irq_evt =
458 devices::IrqLevelEvent::new().map_err(DeviceRegistrationError::EventCreate)?;
459 irq_chip
460 .register_level_irq_event(irq_num, &irq_evt)
461 .map_err(DeviceRegistrationError::RegisterIrqfd)?;
462 device
463 .assign_level_platform_irq(&irq_evt, irq.index)
464 .map_err(DeviceRegistrationError::SetupVfioPlatformIrq)?;
465 keep_rds.extend(irq_evt.as_raw_descriptors());
466 } else {
467 let irq_evt =
468 devices::IrqEdgeEvent::new().map_err(DeviceRegistrationError::EventCreate)?;
469 irq_chip
470 .register_edge_irq_event(irq_num, &irq_evt)
471 .map_err(DeviceRegistrationError::RegisterIrqfd)?;
472 device
473 .assign_edge_platform_irq(&irq_evt, irq.index)
474 .map_err(DeviceRegistrationError::SetupVfioPlatformIrq)?;
475 keep_rds.extend(irq_evt.as_raw_descriptors());
476 }
477 }
478
479 let arced_dev: Arc<Mutex<dyn BusDevice>> = if let Some(jail) = jail {
480 let proxy = ProxyDevice::new(device, &jail, keep_rds)
481 .map_err(DeviceRegistrationError::ProxyDeviceCreation)?;
482 pid_labels.insert(proxy.pid() as u32, proxy.debug_label());
483 Arc::new(Mutex::new(proxy))
484 } else {
485 device.on_sandboxed();
486 Arc::new(Mutex::new(device))
487 };
488 for range in &ranges {
489 mmio_bus
490 .insert(arced_dev.clone(), range.0, range.1)
491 .map_err(DeviceRegistrationError::MmioInsert)?;
492 }
493 }
494 Ok(pid_labels)
495 }
496
497 /// Creates a root PCI device for use by this Vm.
generate_pci_root( mut devices: Vec<(Box<dyn PciDevice>, Option<Minijail>)>, irq_chip: &mut dyn IrqChip, mmio_bus: Arc<Bus>, io_bus: Arc<Bus>, resources: &mut SystemAllocator, vm: &mut impl Vm, max_irqs: usize, ) -> Result< ( PciRoot, Vec<(PciAddress, u32, PciInterruptPin)>, BTreeMap<u32, String>, ), DeviceRegistrationError, >498 pub fn generate_pci_root(
499 mut devices: Vec<(Box<dyn PciDevice>, Option<Minijail>)>,
500 irq_chip: &mut dyn IrqChip,
501 mmio_bus: Arc<Bus>,
502 io_bus: Arc<Bus>,
503 resources: &mut SystemAllocator,
504 vm: &mut impl Vm,
505 max_irqs: usize,
506 ) -> Result<
507 (
508 PciRoot,
509 Vec<(PciAddress, u32, PciInterruptPin)>,
510 BTreeMap<u32, String>,
511 ),
512 DeviceRegistrationError,
513 > {
514 let mut root = PciRoot::new(Arc::downgrade(&mmio_bus), Arc::downgrade(&io_bus));
515 let mut pid_labels = BTreeMap::new();
516 // The map of (dev_idx, bus), find bus number through dev_idx in devices
517 let mut devid_buses: BTreeMap<usize, u8> = BTreeMap::new();
518 // The map of (bridge secondary bus number, Vec<sub device BarRange>)
519 let mut bridge_bar_ranges: BTreeMap<u8, Vec<BarRange>> = BTreeMap::new();
520
521 // Allocate PCI device address before allocating BARs.
522 let mut device_addrs = Vec::<PciAddress>::new();
523 for (dev_idx, (device, _jail)) in devices.iter_mut().enumerate() {
524 let address = device
525 .allocate_address(resources)
526 .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
527 device_addrs.push(address);
528
529 if address.bus > 0 {
530 devid_buses.insert(dev_idx, address.bus);
531 }
532
533 if PciBridge::is_pci_bridge(device) {
534 let sec_bus = PciBridge::get_secondary_bus_num(device);
535 bridge_bar_ranges.insert(sec_bus, Vec::<BarRange>::new());
536 }
537 }
538
539 // Allocate ranges that may need to be in the low MMIO region (MmioType::Low).
540 let mut io_ranges = BTreeMap::new();
541 for (dev_idx, (device, _jail)) in devices.iter_mut().enumerate() {
542 let mut ranges = device
543 .allocate_io_bars(resources)
544 .map_err(DeviceRegistrationError::AllocateIoAddrs)?;
545 io_ranges.insert(dev_idx, ranges.clone());
546
547 if let Some(bus) = devid_buses.get(&dev_idx) {
548 if let Some(bridge_bar) = bridge_bar_ranges.get_mut(bus) {
549 bridge_bar.append(&mut ranges);
550 }
551 }
552 }
553
554 // Allocate device ranges that may be in low or high MMIO after low-only ranges.
555 let mut device_ranges = BTreeMap::new();
556 for (dev_idx, (device, _jail)) in devices.iter_mut().enumerate() {
557 let mut ranges = device
558 .allocate_device_bars(resources)
559 .map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
560 device_ranges.insert(dev_idx, ranges.clone());
561
562 if let Some(bus) = devid_buses.get(&dev_idx) {
563 if let Some(bridge_bar) = bridge_bar_ranges.get_mut(bus) {
564 bridge_bar.append(&mut ranges);
565 }
566 }
567 }
568
569 for (device, _jail) in devices.iter_mut() {
570 if PciBridge::is_pci_bridge(device) {
571 let sec_bus = PciBridge::get_secondary_bus_num(device);
572 if let Some(bridge_bar) = bridge_bar_ranges.get(&sec_bus) {
573 device
574 .configure_bridge_window(resources, bridge_bar)
575 .map_err(DeviceRegistrationError::ConfigureWindowSize)?;
576 }
577 }
578 }
579
580 // Allocate legacy INTx
581 let mut pci_irqs = Vec::new();
582 let mut irqs: Vec<Option<u32>> = vec![None; max_irqs];
583
584 for (dev_idx, (device, _jail)) in devices.iter_mut().enumerate() {
585 // For default interrupt routing use next preallocated interrupt from the pool.
586 let irq_num = if let Some(irq) = irqs[dev_idx % max_irqs] {
587 irq
588 } else {
589 let irq = resources
590 .allocate_irq()
591 .ok_or(DeviceRegistrationError::AllocateIrq)?;
592 irqs[dev_idx % max_irqs] = Some(irq);
593 irq
594 };
595
596 let intx_event =
597 devices::IrqLevelEvent::new().map_err(DeviceRegistrationError::EventCreate)?;
598
599 if let Some((gsi, pin)) = device.assign_irq(&intx_event, Some(irq_num)) {
600 // reserve INTx if needed and non-default.
601 if gsi != irq_num {
602 resources.reserve_irq(gsi);
603 };
604 irq_chip
605 .register_level_irq_event(gsi, &intx_event)
606 .map_err(DeviceRegistrationError::RegisterIrqfd)?;
607
608 pci_irqs.push((device_addrs[dev_idx], gsi, pin));
609 }
610 }
611
612 for (dev_idx, (mut device, jail)) in devices.into_iter().enumerate() {
613 let address = device_addrs[dev_idx];
614
615 let mut keep_rds = device.keep_rds();
616 syslog::push_descriptors(&mut keep_rds);
617 keep_rds.append(&mut vm.get_memory().as_raw_descriptors());
618
619 let ranges = io_ranges.remove(&dev_idx).unwrap_or_default();
620 let device_ranges = device_ranges.remove(&dev_idx).unwrap_or_default();
621 device
622 .register_device_capabilities()
623 .map_err(DeviceRegistrationError::RegisterDeviceCapabilities)?;
624 for (event, addr, datamatch) in device.ioevents() {
625 let io_addr = IoEventAddress::Mmio(addr);
626 vm.register_ioevent(event, io_addr, datamatch)
627 .map_err(DeviceRegistrationError::RegisterIoevent)?;
628 keep_rds.push(event.as_raw_descriptor());
629 }
630
631 let arced_dev: Arc<Mutex<dyn BusDevice>> = if let Some(jail) = jail {
632 let proxy = ProxyDevice::new(device, &jail, keep_rds)
633 .map_err(DeviceRegistrationError::ProxyDeviceCreation)?;
634 pid_labels.insert(proxy.pid() as u32, proxy.debug_label());
635 Arc::new(Mutex::new(proxy))
636 } else {
637 device.on_sandboxed();
638 Arc::new(Mutex::new(device))
639 };
640 root.add_device(address, arced_dev.clone());
641 for range in &ranges {
642 mmio_bus
643 .insert(arced_dev.clone(), range.addr, range.size)
644 .map_err(DeviceRegistrationError::MmioInsert)?;
645 }
646
647 for range in &device_ranges {
648 mmio_bus
649 .insert(arced_dev.clone(), range.addr, range.size)
650 .map_err(DeviceRegistrationError::MmioInsert)?;
651 }
652 }
653 Ok((root, pci_irqs, pid_labels))
654 }
655
656 /// Adds goldfish battery
657 /// return the platform needed resouces include its AML data, irq number
658 ///
659 /// # Arguments
660 ///
661 /// * `amls` - the vector to put the goldfish battery AML
662 /// * `battery_jail` - used when sandbox is enabled
663 /// * `mmio_bus` - bus to add the devices to
664 /// * `irq_chip` - the IrqChip object for registering irq events
665 /// * `irq_num` - assigned interrupt to use
666 /// * `resources` - the SystemAllocator to allocate IO and MMIO for acpi
add_goldfish_battery( amls: &mut Vec<u8>, battery_jail: Option<Minijail>, mmio_bus: &Bus, irq_chip: &mut dyn IrqChip, irq_num: u32, resources: &mut SystemAllocator, ) -> Result<Tube, DeviceRegistrationError>667 pub fn add_goldfish_battery(
668 amls: &mut Vec<u8>,
669 battery_jail: Option<Minijail>,
670 mmio_bus: &Bus,
671 irq_chip: &mut dyn IrqChip,
672 irq_num: u32,
673 resources: &mut SystemAllocator,
674 ) -> Result<Tube, DeviceRegistrationError> {
675 let alloc = resources.get_anon_alloc();
676 let mmio_base = resources
677 .mmio_allocator(MmioType::Low)
678 .allocate_with_align(
679 devices::bat::GOLDFISHBAT_MMIO_LEN,
680 alloc,
681 "GoldfishBattery".to_string(),
682 devices::bat::GOLDFISHBAT_MMIO_LEN,
683 )
684 .map_err(DeviceRegistrationError::AllocateIoResource)?;
685
686 let irq_evt = devices::IrqLevelEvent::new().map_err(DeviceRegistrationError::EventCreate)?;
687
688 irq_chip
689 .register_level_irq_event(irq_num, &irq_evt)
690 .map_err(DeviceRegistrationError::RegisterIrqfd)?;
691
692 let (control_tube, response_tube) =
693 Tube::pair().map_err(DeviceRegistrationError::CreateTube)?;
694
695 #[cfg(feature = "power-monitor-powerd")]
696 let create_monitor = Some(Box::new(power_monitor::powerd::DBusMonitor::connect)
697 as Box<dyn power_monitor::CreatePowerMonitorFn>);
698
699 #[cfg(not(feature = "power-monitor-powerd"))]
700 let create_monitor = None;
701
702 let goldfish_bat =
703 devices::GoldfishBattery::new(mmio_base, irq_num, irq_evt, response_tube, create_monitor)
704 .map_err(DeviceRegistrationError::RegisterBattery)?;
705 goldfish_bat.to_aml_bytes(amls);
706
707 match battery_jail.as_ref() {
708 Some(jail) => {
709 let mut keep_rds = goldfish_bat.keep_rds();
710 syslog::push_fds(&mut keep_rds);
711 mmio_bus
712 .insert(
713 Arc::new(Mutex::new(
714 ProxyDevice::new(goldfish_bat, jail, keep_rds)
715 .map_err(DeviceRegistrationError::ProxyDeviceCreation)?,
716 )),
717 mmio_base,
718 devices::bat::GOLDFISHBAT_MMIO_LEN,
719 )
720 .map_err(DeviceRegistrationError::MmioInsert)?;
721 }
722 None => {
723 mmio_bus
724 .insert(
725 Arc::new(Mutex::new(goldfish_bat)),
726 mmio_base,
727 devices::bat::GOLDFISHBAT_MMIO_LEN,
728 )
729 .map_err(DeviceRegistrationError::MmioInsert)?;
730 }
731 }
732
733 Ok(control_tube)
734 }
735
736 /// Errors for image loading.
737 #[sorted]
738 #[derive(Error, Debug)]
739 pub enum LoadImageError {
740 #[error("Alignment not a power of two: {0}")]
741 BadAlignment(u64),
742 #[error("Image size too large: {0}")]
743 ImageSizeTooLarge(u64),
744 #[error("Reading image into memory failed: {0}")]
745 ReadToMemory(GuestMemoryError),
746 #[error("Seek failed: {0}")]
747 Seek(io::Error),
748 }
749
750 /// Load an image from a file into guest memory.
751 ///
752 /// # Arguments
753 ///
754 /// * `guest_mem` - The memory to be used by the guest.
755 /// * `guest_addr` - The starting address to load the image in the guest memory.
756 /// * `max_size` - The amount of space in bytes available in the guest memory for the image.
757 /// * `image` - The file containing the image to be loaded.
758 ///
759 /// The size in bytes of the loaded image is returned.
load_image<F>( guest_mem: &GuestMemory, image: &mut F, guest_addr: GuestAddress, max_size: u64, ) -> Result<usize, LoadImageError> where F: Read + Seek + AsRawDescriptor,760 pub fn load_image<F>(
761 guest_mem: &GuestMemory,
762 image: &mut F,
763 guest_addr: GuestAddress,
764 max_size: u64,
765 ) -> Result<usize, LoadImageError>
766 where
767 F: Read + Seek + AsRawDescriptor,
768 {
769 let size = image.seek(SeekFrom::End(0)).map_err(LoadImageError::Seek)?;
770
771 if size > usize::max_value() as u64 || size > max_size {
772 return Err(LoadImageError::ImageSizeTooLarge(size));
773 }
774
775 // This is safe due to the bounds check above.
776 let size = size as usize;
777
778 image
779 .seek(SeekFrom::Start(0))
780 .map_err(LoadImageError::Seek)?;
781
782 guest_mem
783 .read_to_memory(guest_addr, image, size)
784 .map_err(LoadImageError::ReadToMemory)?;
785
786 Ok(size)
787 }
788
789 /// Load an image from a file into guest memory at the highest possible address.
790 ///
791 /// # Arguments
792 ///
793 /// * `guest_mem` - The memory to be used by the guest.
794 /// * `image` - The file containing the image to be loaded.
795 /// * `min_guest_addr` - The minimum address of the start of the image.
796 /// * `max_guest_addr` - The address to load the last byte of the image.
797 /// * `align` - The minimum alignment of the start address of the image in bytes
798 /// (must be a power of two).
799 ///
800 /// The guest address and size in bytes of the loaded image are returned.
load_image_high<F>( guest_mem: &GuestMemory, image: &mut F, min_guest_addr: GuestAddress, max_guest_addr: GuestAddress, align: u64, ) -> Result<(GuestAddress, usize), LoadImageError> where F: Read + Seek + AsRawDescriptor,801 pub fn load_image_high<F>(
802 guest_mem: &GuestMemory,
803 image: &mut F,
804 min_guest_addr: GuestAddress,
805 max_guest_addr: GuestAddress,
806 align: u64,
807 ) -> Result<(GuestAddress, usize), LoadImageError>
808 where
809 F: Read + Seek + AsRawDescriptor,
810 {
811 if !align.is_power_of_two() {
812 return Err(LoadImageError::BadAlignment(align));
813 }
814
815 let max_size = max_guest_addr.offset_from(min_guest_addr) & !(align - 1);
816 let size = image.seek(SeekFrom::End(0)).map_err(LoadImageError::Seek)?;
817
818 if size > usize::max_value() as u64 || size > max_size {
819 return Err(LoadImageError::ImageSizeTooLarge(size));
820 }
821
822 image
823 .seek(SeekFrom::Start(0))
824 .map_err(LoadImageError::Seek)?;
825
826 // Load image at the maximum aligned address allowed.
827 // The subtraction cannot underflow because of the size checks above.
828 let guest_addr = GuestAddress((max_guest_addr.offset() - size) & !(align - 1));
829
830 // This is safe due to the bounds check above.
831 let size = size as usize;
832
833 guest_mem
834 .read_to_memory(guest_addr, image, size)
835 .map_err(LoadImageError::ReadToMemory)?;
836
837 Ok((guest_addr, size))
838 }
839