1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::collections::BTreeMap;
6 use std::sync::Arc;
7
8 #[cfg(target_arch = "x86_64")]
9 use acpi_tables::sdt::SDT;
10 use anyhow::anyhow;
11 use anyhow::Context;
12 use base::debug;
13 use base::error;
14 use base::trace;
15 use base::AsRawDescriptor;
16 use base::AsRawDescriptors;
17 use base::Event;
18 use base::Protection;
19 use base::RawDescriptor;
20 use base::Result;
21 use base::SharedMemory;
22 use base::Tube;
23 use base::WorkerThread;
24 use data_model::Le32;
25 use hypervisor::Datamatch;
26 use hypervisor::MemCacheType;
27 use libc::ERANGE;
28 #[cfg(target_arch = "x86_64")]
29 use metrics::MetricEventType;
30 use resources::AddressRange;
31 use resources::Alloc;
32 use resources::AllocOptions;
33 use resources::SystemAllocator;
34 use serde::Deserialize;
35 use serde::Serialize;
36 use snapshot::AnySnapshot;
37 use sync::Mutex;
38 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_ACKNOWLEDGE;
39 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_DRIVER;
40 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_DRIVER_OK;
41 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_FAILED;
42 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_FEATURES_OK;
43 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_NEEDS_RESET;
44 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_SUSPEND;
45 use vm_control::api::VmMemoryClient;
46 use vm_control::VmMemoryDestination;
47 use vm_control::VmMemoryRegionId;
48 use vm_control::VmMemorySource;
49 use vm_memory::GuestMemory;
50 use zerocopy::FromBytes;
51 use zerocopy::Immutable;
52 use zerocopy::IntoBytes;
53 use zerocopy::KnownLayout;
54
55 use self::virtio_pci_common_config::VirtioPciCommonConfig;
56 use super::*;
57 #[cfg(target_arch = "x86_64")]
58 use crate::acpi::PmWakeupEvent;
59 #[cfg(target_arch = "x86_64")]
60 use crate::pci::pm::PciDevicePower;
61 use crate::pci::pm::PciPmCap;
62 use crate::pci::pm::PmConfig;
63 use crate::pci::pm::PmStatusChange;
64 use crate::pci::BarRange;
65 use crate::pci::MsixCap;
66 use crate::pci::MsixConfig;
67 use crate::pci::MsixStatus;
68 use crate::pci::PciAddress;
69 use crate::pci::PciBarConfiguration;
70 use crate::pci::PciBarIndex;
71 use crate::pci::PciBarPrefetchable;
72 use crate::pci::PciBarRegionType;
73 use crate::pci::PciBaseSystemPeripheralSubclass;
74 use crate::pci::PciCapability;
75 use crate::pci::PciCapabilityID;
76 use crate::pci::PciClassCode;
77 use crate::pci::PciConfiguration;
78 use crate::pci::PciDevice;
79 use crate::pci::PciDeviceError;
80 use crate::pci::PciDisplaySubclass;
81 use crate::pci::PciHeaderType;
82 use crate::pci::PciId;
83 use crate::pci::PciInputDeviceSubclass;
84 use crate::pci::PciInterruptPin;
85 use crate::pci::PciMassStorageSubclass;
86 use crate::pci::PciMultimediaSubclass;
87 use crate::pci::PciNetworkControllerSubclass;
88 use crate::pci::PciSimpleCommunicationControllerSubclass;
89 use crate::pci::PciSubclass;
90 use crate::pci::PciWirelessControllerSubclass;
91 use crate::virtio::ipc_memory_mapper::IpcMemoryMapper;
92 #[cfg(feature = "pci-hotplug")]
93 use crate::HotPluggable;
94 use crate::IrqLevelEvent;
95 use crate::Suspendable;
96
97 #[repr(u8)]
98 #[derive(Debug, Copy, Clone, enumn::N)]
99 pub enum PciCapabilityType {
100 CommonConfig = 1,
101 NotifyConfig = 2,
102 IsrConfig = 3,
103 DeviceConfig = 4,
104 PciConfig = 5,
105 // Doorbell, Notification and SharedMemory are Virtio Vhost User related PCI
106 // capabilities. Specified in 5.7.7.4 here
107 // https://stefanha.github.io/virtio/vhost-user-slave.html#x1-2830007.
108 DoorbellConfig = 6,
109 NotificationConfig = 7,
110 SharedMemoryConfig = 8,
111 }
112
113 #[allow(dead_code)]
114 #[repr(C)]
115 #[derive(Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)]
116 pub struct VirtioPciCap {
117 // cap_vndr and cap_next are autofilled based on id() in pci configuration
118 pub cap_vndr: u8, // Generic PCI field: PCI_CAP_ID_VNDR
119 pub cap_next: u8, // Generic PCI field: next ptr
120 pub cap_len: u8, // Generic PCI field: capability length
121 pub cfg_type: u8, // Identifies the structure.
122 pub bar: u8, // Where to find it.
123 id: u8, // Multiple capabilities of the same type
124 padding: [u8; 2], // Pad to full dword.
125 pub offset: Le32, // Offset within bar.
126 pub length: Le32, // Length of the structure, in bytes.
127 }
128
129 impl PciCapability for VirtioPciCap {
bytes(&self) -> &[u8]130 fn bytes(&self) -> &[u8] {
131 self.as_bytes()
132 }
133
id(&self) -> PciCapabilityID134 fn id(&self) -> PciCapabilityID {
135 PciCapabilityID::VendorSpecific
136 }
137
writable_bits(&self) -> Vec<u32>138 fn writable_bits(&self) -> Vec<u32> {
139 vec![0u32; 4]
140 }
141 }
142
143 impl VirtioPciCap {
new(cfg_type: PciCapabilityType, bar: u8, offset: u32, length: u32) -> Self144 pub fn new(cfg_type: PciCapabilityType, bar: u8, offset: u32, length: u32) -> Self {
145 VirtioPciCap {
146 cap_vndr: 0,
147 cap_next: 0,
148 cap_len: std::mem::size_of::<VirtioPciCap>() as u8,
149 cfg_type: cfg_type as u8,
150 bar,
151 id: 0,
152 padding: [0; 2],
153 offset: Le32::from(offset),
154 length: Le32::from(length),
155 }
156 }
157
set_cap_len(&mut self, cap_len: u8)158 pub fn set_cap_len(&mut self, cap_len: u8) {
159 self.cap_len = cap_len;
160 }
161 }
162
163 #[allow(dead_code)]
164 #[repr(C)]
165 #[derive(Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)]
166 pub struct VirtioPciNotifyCap {
167 cap: VirtioPciCap,
168 notify_off_multiplier: Le32,
169 }
170
171 impl PciCapability for VirtioPciNotifyCap {
bytes(&self) -> &[u8]172 fn bytes(&self) -> &[u8] {
173 self.as_bytes()
174 }
175
id(&self) -> PciCapabilityID176 fn id(&self) -> PciCapabilityID {
177 PciCapabilityID::VendorSpecific
178 }
179
writable_bits(&self) -> Vec<u32>180 fn writable_bits(&self) -> Vec<u32> {
181 vec![0u32; 5]
182 }
183 }
184
185 impl VirtioPciNotifyCap {
new( cfg_type: PciCapabilityType, bar: u8, offset: u32, length: u32, multiplier: Le32, ) -> Self186 pub fn new(
187 cfg_type: PciCapabilityType,
188 bar: u8,
189 offset: u32,
190 length: u32,
191 multiplier: Le32,
192 ) -> Self {
193 VirtioPciNotifyCap {
194 cap: VirtioPciCap {
195 cap_vndr: 0,
196 cap_next: 0,
197 cap_len: std::mem::size_of::<VirtioPciNotifyCap>() as u8,
198 cfg_type: cfg_type as u8,
199 bar,
200 id: 0,
201 padding: [0; 2],
202 offset: Le32::from(offset),
203 length: Le32::from(length),
204 },
205 notify_off_multiplier: multiplier,
206 }
207 }
208 }
209
210 #[repr(C)]
211 #[derive(Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)]
212 pub struct VirtioPciShmCap {
213 cap: VirtioPciCap,
214 offset_hi: Le32, // Most sig 32 bits of offset
215 length_hi: Le32, // Most sig 32 bits of length
216 }
217
218 impl PciCapability for VirtioPciShmCap {
bytes(&self) -> &[u8]219 fn bytes(&self) -> &[u8] {
220 self.as_bytes()
221 }
222
id(&self) -> PciCapabilityID223 fn id(&self) -> PciCapabilityID {
224 PciCapabilityID::VendorSpecific
225 }
226
writable_bits(&self) -> Vec<u32>227 fn writable_bits(&self) -> Vec<u32> {
228 vec![0u32; 6]
229 }
230 }
231
232 impl VirtioPciShmCap {
new(cfg_type: PciCapabilityType, bar: u8, offset: u64, length: u64, shmid: u8) -> Self233 pub fn new(cfg_type: PciCapabilityType, bar: u8, offset: u64, length: u64, shmid: u8) -> Self {
234 VirtioPciShmCap {
235 cap: VirtioPciCap {
236 cap_vndr: 0,
237 cap_next: 0,
238 cap_len: std::mem::size_of::<VirtioPciShmCap>() as u8,
239 cfg_type: cfg_type as u8,
240 bar,
241 id: shmid,
242 padding: [0; 2],
243 offset: Le32::from(offset as u32),
244 length: Le32::from(length as u32),
245 },
246 offset_hi: Le32::from((offset >> 32) as u32),
247 length_hi: Le32::from((length >> 32) as u32),
248 }
249 }
250 }
251
252 // Allocate one bar for the structs pointed to by the capability structures.
253 const COMMON_CONFIG_BAR_OFFSET: u64 = 0x0000;
254 const COMMON_CONFIG_SIZE: u64 = 56;
255 const COMMON_CONFIG_LAST: u64 = COMMON_CONFIG_BAR_OFFSET + COMMON_CONFIG_SIZE - 1;
256 const ISR_CONFIG_BAR_OFFSET: u64 = 0x1000;
257 const ISR_CONFIG_SIZE: u64 = 1;
258 const ISR_CONFIG_LAST: u64 = ISR_CONFIG_BAR_OFFSET + ISR_CONFIG_SIZE - 1;
259 const DEVICE_CONFIG_BAR_OFFSET: u64 = 0x2000;
260 const DEVICE_CONFIG_SIZE: u64 = 0x1000;
261 const DEVICE_CONFIG_LAST: u64 = DEVICE_CONFIG_BAR_OFFSET + DEVICE_CONFIG_SIZE - 1;
262 const NOTIFICATION_BAR_OFFSET: u64 = 0x3000;
263 const NOTIFICATION_SIZE: u64 = 0x1000;
264 const NOTIFICATION_LAST: u64 = NOTIFICATION_BAR_OFFSET + NOTIFICATION_SIZE - 1;
265 const MSIX_TABLE_BAR_OFFSET: u64 = 0x6000;
266 const MSIX_TABLE_SIZE: u64 = 0x1000;
267 const MSIX_TABLE_LAST: u64 = MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE - 1;
268 const MSIX_PBA_BAR_OFFSET: u64 = 0x7000;
269 const MSIX_PBA_SIZE: u64 = 0x1000;
270 const MSIX_PBA_LAST: u64 = MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE - 1;
271 const CAPABILITY_BAR_SIZE: u64 = 0x8000;
272
273 const NOTIFY_OFF_MULTIPLIER: u32 = 4; // A dword per notification address.
274
275 const VIRTIO_PCI_VENDOR_ID: u16 = 0x1af4;
276 const VIRTIO_PCI_DEVICE_ID_BASE: u16 = 0x1040; // Add to device type to get device ID.
277 const VIRTIO_PCI_REVISION_ID: u8 = 1;
278
279 const CAPABILITIES_BAR_NUM: usize = 0;
280 const SHMEM_BAR_NUM: usize = 2;
281
282 struct QueueEvent {
283 event: Event,
284 ioevent_registered: bool,
285 }
286
287 /// Implements the
288 /// [PCI](http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-650001)
289 /// transport for virtio devices.
290 pub struct VirtioPciDevice {
291 config_regs: PciConfiguration,
292 preferred_address: Option<PciAddress>,
293 pci_address: Option<PciAddress>,
294
295 device: Box<dyn VirtioDevice>,
296 device_activated: bool,
297 disable_intx: bool,
298
299 interrupt: Option<Interrupt>,
300 interrupt_evt: Option<IrqLevelEvent>,
301 interrupt_resample_worker: Option<WorkerThread<()>>,
302
303 queues: Vec<QueueConfig>,
304 queue_evts: Vec<QueueEvent>,
305 mem: GuestMemory,
306 settings_bar: PciBarIndex,
307 msix_config: Arc<Mutex<MsixConfig>>,
308 pm_config: Arc<Mutex<PmConfig>>,
309 common_config: VirtioPciCommonConfig,
310
311 iommu: Option<Arc<Mutex<IpcMemoryMapper>>>,
312
313 // API client that is present if the device has shared memory regions, and
314 // is used to map/unmap files into the shared memory region.
315 shared_memory_vm_memory_client: Option<VmMemoryClient>,
316
317 // API client for registration of ioevents when PCI BAR reprogramming is detected.
318 ioevent_vm_memory_client: VmMemoryClient,
319
320 // State only present while asleep.
321 sleep_state: Option<SleepState>,
322
323 vm_control_tube: Arc<Mutex<Tube>>,
324 }
325
326 enum SleepState {
327 // Asleep and device hasn't been activated yet by the guest.
328 Inactive,
329 // Asleep and device has been activated by the guest.
330 Active {
331 /// The queues returned from `VirtioDevice::virtio_sleep`.
332 /// Map is from queue index -> Queue.
333 activated_queues: BTreeMap<usize, Queue>,
334 },
335 }
336
337 #[derive(Serialize, Deserialize)]
338 struct VirtioPciDeviceSnapshot {
339 config_regs: AnySnapshot,
340
341 inner_device: AnySnapshot,
342 device_activated: bool,
343
344 interrupt: Option<InterruptSnapshot>,
345 msix_config: AnySnapshot,
346 common_config: VirtioPciCommonConfig,
347
348 queues: Vec<AnySnapshot>,
349 activated_queues: Option<Vec<(usize, AnySnapshot)>>,
350 }
351
352 impl VirtioPciDevice {
353 /// Constructs a new PCI transport for the given virtio device.
new( mem: GuestMemory, device: Box<dyn VirtioDevice>, msi_device_tube: Tube, disable_intx: bool, shared_memory_vm_memory_client: Option<VmMemoryClient>, ioevent_vm_memory_client: VmMemoryClient, vm_control_tube: Tube, ) -> Result<Self>354 pub fn new(
355 mem: GuestMemory,
356 device: Box<dyn VirtioDevice>,
357 msi_device_tube: Tube,
358 disable_intx: bool,
359 shared_memory_vm_memory_client: Option<VmMemoryClient>,
360 ioevent_vm_memory_client: VmMemoryClient,
361 vm_control_tube: Tube,
362 ) -> Result<Self> {
363 // shared_memory_vm_memory_client is required if there are shared memory regions.
364 assert_eq!(
365 device.get_shared_memory_region().is_none(),
366 shared_memory_vm_memory_client.is_none()
367 );
368
369 let mut queue_evts = Vec::new();
370 for _ in device.queue_max_sizes() {
371 queue_evts.push(QueueEvent {
372 event: Event::new()?,
373 ioevent_registered: false,
374 });
375 }
376 let queues = device
377 .queue_max_sizes()
378 .iter()
379 .map(|&s| QueueConfig::new(s, device.features()))
380 .collect();
381
382 let pci_device_id = VIRTIO_PCI_DEVICE_ID_BASE + device.device_type() as u16;
383
384 let (pci_device_class, pci_device_subclass) = match device.device_type() {
385 DeviceType::Net => (
386 PciClassCode::NetworkController,
387 &PciNetworkControllerSubclass::Other as &dyn PciSubclass,
388 ),
389 DeviceType::Block => (
390 PciClassCode::MassStorage,
391 &PciMassStorageSubclass::Other as &dyn PciSubclass,
392 ),
393 DeviceType::Console => (
394 PciClassCode::SimpleCommunicationController,
395 &PciSimpleCommunicationControllerSubclass::Other as &dyn PciSubclass,
396 ),
397 DeviceType::Rng => (
398 PciClassCode::BaseSystemPeripheral,
399 &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
400 ),
401 DeviceType::Balloon => (
402 PciClassCode::BaseSystemPeripheral,
403 &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
404 ),
405 DeviceType::Scsi => (
406 PciClassCode::MassStorage,
407 &PciMassStorageSubclass::Scsi as &dyn PciSubclass,
408 ),
409 DeviceType::P9 => (
410 PciClassCode::NetworkController,
411 &PciNetworkControllerSubclass::Other as &dyn PciSubclass,
412 ),
413 DeviceType::Gpu => (
414 PciClassCode::DisplayController,
415 &PciDisplaySubclass::Other as &dyn PciSubclass,
416 ),
417 DeviceType::Input => (
418 PciClassCode::InputDevice,
419 &PciInputDeviceSubclass::Other as &dyn PciSubclass,
420 ),
421 DeviceType::Vsock => (
422 PciClassCode::NetworkController,
423 &PciNetworkControllerSubclass::Other as &dyn PciSubclass,
424 ),
425 DeviceType::Iommu => (
426 PciClassCode::BaseSystemPeripheral,
427 &PciBaseSystemPeripheralSubclass::Iommu as &dyn PciSubclass,
428 ),
429 DeviceType::Sound => (
430 PciClassCode::MultimediaController,
431 &PciMultimediaSubclass::AudioController as &dyn PciSubclass,
432 ),
433 DeviceType::Fs => (
434 PciClassCode::MassStorage,
435 &PciMassStorageSubclass::Other as &dyn PciSubclass,
436 ),
437 DeviceType::Pmem => (
438 PciClassCode::MassStorage,
439 &PciMassStorageSubclass::NonVolatileMemory as &dyn PciSubclass,
440 ),
441 DeviceType::Mac80211HwSim => (
442 PciClassCode::WirelessController,
443 &PciWirelessControllerSubclass::Other as &dyn PciSubclass,
444 ),
445 DeviceType::VideoEncoder => (
446 PciClassCode::MultimediaController,
447 &PciMultimediaSubclass::VideoController as &dyn PciSubclass,
448 ),
449 DeviceType::VideoDecoder => (
450 PciClassCode::MultimediaController,
451 &PciMultimediaSubclass::VideoController as &dyn PciSubclass,
452 ),
453 DeviceType::Media => (
454 PciClassCode::MultimediaController,
455 &PciMultimediaSubclass::VideoController as &dyn PciSubclass,
456 ),
457 DeviceType::Scmi => (
458 PciClassCode::BaseSystemPeripheral,
459 &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
460 ),
461 DeviceType::Wl => (
462 PciClassCode::DisplayController,
463 &PciDisplaySubclass::Other as &dyn PciSubclass,
464 ),
465 DeviceType::Tpm => (
466 PciClassCode::BaseSystemPeripheral,
467 &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
468 ),
469 DeviceType::Pvclock => (
470 PciClassCode::BaseSystemPeripheral,
471 &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
472 ),
473 };
474
475 let num_interrupts = device.num_interrupts();
476
477 // One MSI-X vector per queue plus one for configuration changes.
478 let msix_num = u16::try_from(num_interrupts + 1).map_err(|_| base::Error::new(ERANGE))?;
479 let msix_config = Arc::new(Mutex::new(MsixConfig::new(
480 msix_num,
481 msi_device_tube,
482 PciId::new(VIRTIO_PCI_VENDOR_ID, pci_device_id).into(),
483 device.debug_label(),
484 )));
485
486 let config_regs = PciConfiguration::new(
487 VIRTIO_PCI_VENDOR_ID,
488 pci_device_id,
489 pci_device_class,
490 pci_device_subclass,
491 None,
492 PciHeaderType::Device,
493 VIRTIO_PCI_VENDOR_ID,
494 pci_device_id,
495 VIRTIO_PCI_REVISION_ID,
496 );
497
498 Ok(VirtioPciDevice {
499 config_regs,
500 preferred_address: device.pci_address(),
501 pci_address: None,
502 device,
503 device_activated: false,
504 disable_intx,
505 interrupt: None,
506 interrupt_evt: None,
507 interrupt_resample_worker: None,
508 queues,
509 queue_evts,
510 mem,
511 settings_bar: 0,
512 msix_config,
513 pm_config: Arc::new(Mutex::new(PmConfig::new(true))),
514 common_config: VirtioPciCommonConfig {
515 driver_status: 0,
516 config_generation: 0,
517 device_feature_select: 0,
518 driver_feature_select: 0,
519 queue_select: 0,
520 msix_config: VIRTIO_MSI_NO_VECTOR,
521 },
522 iommu: None,
523 shared_memory_vm_memory_client,
524 ioevent_vm_memory_client,
525 sleep_state: None,
526 vm_control_tube: Arc::new(Mutex::new(vm_control_tube)),
527 })
528 }
529
is_driver_ready(&self) -> bool530 fn is_driver_ready(&self) -> bool {
531 let ready_bits = (VIRTIO_CONFIG_S_ACKNOWLEDGE
532 | VIRTIO_CONFIG_S_DRIVER
533 | VIRTIO_CONFIG_S_DRIVER_OK
534 | VIRTIO_CONFIG_S_FEATURES_OK) as u8;
535 (self.common_config.driver_status & ready_bits) == ready_bits
536 && self.common_config.driver_status & VIRTIO_CONFIG_S_FAILED as u8 == 0
537 }
538
is_device_suspended(&self) -> bool539 fn is_device_suspended(&self) -> bool {
540 (self.common_config.driver_status & VIRTIO_CONFIG_S_SUSPEND as u8) != 0
541 }
542
543 /// Determines if the driver has requested the device reset itself
is_reset_requested(&self) -> bool544 fn is_reset_requested(&self) -> bool {
545 self.common_config.driver_status == DEVICE_RESET as u8
546 }
547
add_settings_pci_capabilities( &mut self, settings_bar: u8, ) -> std::result::Result<(), PciDeviceError>548 fn add_settings_pci_capabilities(
549 &mut self,
550 settings_bar: u8,
551 ) -> std::result::Result<(), PciDeviceError> {
552 // Add pointers to the different configuration structures from the PCI capabilities.
553 let common_cap = VirtioPciCap::new(
554 PciCapabilityType::CommonConfig,
555 settings_bar,
556 COMMON_CONFIG_BAR_OFFSET as u32,
557 COMMON_CONFIG_SIZE as u32,
558 );
559 self.config_regs
560 .add_capability(&common_cap, None)
561 .map_err(PciDeviceError::CapabilitiesSetup)?;
562
563 let isr_cap = VirtioPciCap::new(
564 PciCapabilityType::IsrConfig,
565 settings_bar,
566 ISR_CONFIG_BAR_OFFSET as u32,
567 ISR_CONFIG_SIZE as u32,
568 );
569 self.config_regs
570 .add_capability(&isr_cap, None)
571 .map_err(PciDeviceError::CapabilitiesSetup)?;
572
573 // TODO(dgreid) - set based on device's configuration size?
574 let device_cap = VirtioPciCap::new(
575 PciCapabilityType::DeviceConfig,
576 settings_bar,
577 DEVICE_CONFIG_BAR_OFFSET as u32,
578 DEVICE_CONFIG_SIZE as u32,
579 );
580 self.config_regs
581 .add_capability(&device_cap, None)
582 .map_err(PciDeviceError::CapabilitiesSetup)?;
583
584 let notify_cap = VirtioPciNotifyCap::new(
585 PciCapabilityType::NotifyConfig,
586 settings_bar,
587 NOTIFICATION_BAR_OFFSET as u32,
588 NOTIFICATION_SIZE as u32,
589 Le32::from(NOTIFY_OFF_MULTIPLIER),
590 );
591 self.config_regs
592 .add_capability(¬ify_cap, None)
593 .map_err(PciDeviceError::CapabilitiesSetup)?;
594
595 //TODO(dgreid) - How will the configuration_cap work?
596 let configuration_cap = VirtioPciCap::new(PciCapabilityType::PciConfig, 0, 0, 0);
597 self.config_regs
598 .add_capability(&configuration_cap, None)
599 .map_err(PciDeviceError::CapabilitiesSetup)?;
600
601 let msix_cap = MsixCap::new(
602 settings_bar,
603 self.msix_config.lock().num_vectors(),
604 MSIX_TABLE_BAR_OFFSET as u32,
605 settings_bar,
606 MSIX_PBA_BAR_OFFSET as u32,
607 );
608 self.config_regs
609 .add_capability(&msix_cap, Some(Box::new(self.msix_config.clone())))
610 .map_err(PciDeviceError::CapabilitiesSetup)?;
611
612 self.config_regs
613 .add_capability(&PciPmCap::new(), Some(Box::new(self.pm_config.clone())))
614 .map_err(PciDeviceError::CapabilitiesSetup)?;
615
616 self.settings_bar = settings_bar as PciBarIndex;
617 Ok(())
618 }
619
620 /// Activates the underlying `VirtioDevice`. `assign_irq` has to be called first.
activate(&mut self) -> anyhow::Result<()>621 fn activate(&mut self) -> anyhow::Result<()> {
622 let interrupt = Interrupt::new(
623 self.interrupt_evt
624 .as_ref()
625 .ok_or_else(|| anyhow!("{} interrupt_evt is none", self.debug_label()))?
626 .try_clone()
627 .with_context(|| format!("{} failed to clone interrupt_evt", self.debug_label()))?,
628 Some(self.msix_config.clone()),
629 self.common_config.msix_config,
630 #[cfg(target_arch = "x86_64")]
631 Some((
632 PmWakeupEvent::new(self.vm_control_tube.clone(), self.pm_config.clone()),
633 MetricEventType::VirtioWakeup {
634 virtio_id: self.device.device_type() as u32,
635 },
636 )),
637 );
638 self.interrupt = Some(interrupt.clone());
639 self.interrupt_resample_worker = interrupt.spawn_resample_thread();
640
641 let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
642 let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
643
644 // Use ready queues and their events.
645 let queues = self
646 .queues
647 .iter_mut()
648 .enumerate()
649 .zip(self.queue_evts.iter_mut())
650 .filter(|((_, q), _)| q.ready())
651 .map(|((queue_index, queue), evt)| {
652 if !evt.ioevent_registered {
653 self.ioevent_vm_memory_client
654 .register_io_event(
655 evt.event.try_clone().context("failed to clone Event")?,
656 notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
657 Datamatch::AnyLength,
658 )
659 .context("failed to register ioevent")?;
660 evt.ioevent_registered = true;
661 }
662 let queue_evt = evt.event.try_clone().context("failed to clone queue_evt")?;
663 Ok((
664 queue_index,
665 queue
666 .activate(&self.mem, queue_evt, interrupt.clone())
667 .context("failed to activate queue")?,
668 ))
669 })
670 .collect::<anyhow::Result<BTreeMap<usize, Queue>>>()?;
671
672 if let Err(e) = self.device.activate(self.mem.clone(), interrupt, queues) {
673 error!("{} activate failed: {:#}", self.debug_label(), e);
674 self.common_config.driver_status |= VIRTIO_CONFIG_S_NEEDS_RESET as u8;
675 } else {
676 self.device_activated = true;
677 }
678
679 Ok(())
680 }
681
unregister_ioevents(&mut self) -> anyhow::Result<()>682 fn unregister_ioevents(&mut self) -> anyhow::Result<()> {
683 let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
684 let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
685
686 for (queue_index, evt) in self.queue_evts.iter_mut().enumerate() {
687 if evt.ioevent_registered {
688 self.ioevent_vm_memory_client
689 .unregister_io_event(
690 evt.event.try_clone().context("failed to clone Event")?,
691 notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
692 Datamatch::AnyLength,
693 )
694 .context("failed to unregister ioevent")?;
695 evt.ioevent_registered = false;
696 }
697 }
698 Ok(())
699 }
700
virtio_device(&self) -> &dyn VirtioDevice701 pub fn virtio_device(&self) -> &dyn VirtioDevice {
702 self.device.as_ref()
703 }
704
pci_address(&self) -> Option<PciAddress>705 pub fn pci_address(&self) -> Option<PciAddress> {
706 self.pci_address
707 }
708
709 #[cfg(target_arch = "x86_64")]
handle_pm_status_change(&mut self, status: &PmStatusChange)710 fn handle_pm_status_change(&mut self, status: &PmStatusChange) {
711 if let Some(interrupt) = self.interrupt.as_mut() {
712 interrupt.set_wakeup_event_active(status.to == PciDevicePower::D3)
713 }
714 }
715
716 #[cfg(not(target_arch = "x86_64"))]
handle_pm_status_change(&mut self, _status: &PmStatusChange)717 fn handle_pm_status_change(&mut self, _status: &PmStatusChange) {}
718 }
719
720 impl PciDevice for VirtioPciDevice {
debug_label(&self) -> String721 fn debug_label(&self) -> String {
722 format!("pci{}", self.device.debug_label())
723 }
724
preferred_address(&self) -> Option<PciAddress>725 fn preferred_address(&self) -> Option<PciAddress> {
726 self.preferred_address
727 }
728
allocate_address( &mut self, resources: &mut SystemAllocator, ) -> std::result::Result<PciAddress, PciDeviceError>729 fn allocate_address(
730 &mut self,
731 resources: &mut SystemAllocator,
732 ) -> std::result::Result<PciAddress, PciDeviceError> {
733 if self.pci_address.is_none() {
734 if let Some(address) = self.preferred_address {
735 if !resources.reserve_pci(address, self.debug_label()) {
736 return Err(PciDeviceError::PciAllocationFailed);
737 }
738 self.pci_address = Some(address);
739 } else {
740 self.pci_address = resources.allocate_pci(0, self.debug_label());
741 }
742 }
743 self.pci_address.ok_or(PciDeviceError::PciAllocationFailed)
744 }
745
keep_rds(&self) -> Vec<RawDescriptor>746 fn keep_rds(&self) -> Vec<RawDescriptor> {
747 let mut rds = self.device.keep_rds();
748 rds.extend(
749 self.queue_evts
750 .iter()
751 .map(|qe| qe.event.as_raw_descriptor()),
752 );
753 if let Some(interrupt_evt) = &self.interrupt_evt {
754 rds.extend(interrupt_evt.as_raw_descriptors());
755 }
756 let descriptor = self.msix_config.lock().get_msi_socket();
757 rds.push(descriptor);
758 if let Some(iommu) = &self.iommu {
759 rds.append(&mut iommu.lock().as_raw_descriptors());
760 }
761 rds.push(self.ioevent_vm_memory_client.as_raw_descriptor());
762 rds.push(self.vm_control_tube.lock().as_raw_descriptor());
763 rds
764 }
765
assign_irq(&mut self, irq_evt: IrqLevelEvent, pin: PciInterruptPin, irq_num: u32)766 fn assign_irq(&mut self, irq_evt: IrqLevelEvent, pin: PciInterruptPin, irq_num: u32) {
767 self.interrupt_evt = Some(irq_evt);
768 if !self.disable_intx {
769 self.config_regs.set_irq(irq_num as u8, pin);
770 }
771 }
772
allocate_io_bars( &mut self, resources: &mut SystemAllocator, ) -> std::result::Result<Vec<BarRange>, PciDeviceError>773 fn allocate_io_bars(
774 &mut self,
775 resources: &mut SystemAllocator,
776 ) -> std::result::Result<Vec<BarRange>, PciDeviceError> {
777 let device_type = self.device.device_type();
778 allocate_io_bars(
779 self,
780 |size: u64, alloc: Alloc, alloc_option: &AllocOptions| {
781 resources
782 .allocate_mmio(
783 size,
784 alloc,
785 format!("virtio-{}-cap_bar", device_type),
786 alloc_option,
787 )
788 .map_err(|e| PciDeviceError::IoAllocationFailed(size, e))
789 },
790 )
791 }
792
allocate_device_bars( &mut self, resources: &mut SystemAllocator, ) -> std::result::Result<Vec<BarRange>, PciDeviceError>793 fn allocate_device_bars(
794 &mut self,
795 resources: &mut SystemAllocator,
796 ) -> std::result::Result<Vec<BarRange>, PciDeviceError> {
797 let device_type = self.device.device_type();
798 allocate_device_bars(
799 self,
800 |size: u64, alloc: Alloc, alloc_option: &AllocOptions| {
801 resources
802 .allocate_mmio(
803 size,
804 alloc,
805 format!("virtio-{}-custom_bar", device_type),
806 alloc_option,
807 )
808 .map_err(|e| PciDeviceError::IoAllocationFailed(size, e))
809 },
810 )
811 }
812
destroy_device(&mut self)813 fn destroy_device(&mut self) {
814 if let Err(e) = self.unregister_ioevents() {
815 error!("error destroying {}: {:?}", &self.debug_label(), &e);
816 }
817 }
818
get_bar_configuration(&self, bar_num: usize) -> Option<PciBarConfiguration>819 fn get_bar_configuration(&self, bar_num: usize) -> Option<PciBarConfiguration> {
820 self.config_regs.get_bar_configuration(bar_num)
821 }
822
register_device_capabilities(&mut self) -> std::result::Result<(), PciDeviceError>823 fn register_device_capabilities(&mut self) -> std::result::Result<(), PciDeviceError> {
824 let mut caps = self.device.get_device_caps();
825 if let Some(region) = self.device.get_shared_memory_region() {
826 caps.push(Box::new(VirtioPciShmCap::new(
827 PciCapabilityType::SharedMemoryConfig,
828 SHMEM_BAR_NUM as u8,
829 0,
830 region.length,
831 region.id,
832 )));
833 }
834
835 for cap in caps {
836 self.config_regs
837 .add_capability(&*cap, None)
838 .map_err(PciDeviceError::CapabilitiesSetup)?;
839 }
840
841 Ok(())
842 }
843
read_config_register(&self, reg_idx: usize) -> u32844 fn read_config_register(&self, reg_idx: usize) -> u32 {
845 self.config_regs.read_reg(reg_idx)
846 }
847
write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8])848 fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) {
849 if let Some(res) = self.config_regs.write_reg(reg_idx, offset, data) {
850 if let Some(msix_behavior) = res.downcast_ref::<MsixStatus>() {
851 self.device.control_notify(*msix_behavior);
852 } else if let Some(status) = res.downcast_ref::<PmStatusChange>() {
853 self.handle_pm_status_change(status);
854 }
855 }
856 }
857
setup_pci_config_mapping( &mut self, shmem: &SharedMemory, base: usize, len: usize, ) -> std::result::Result<bool, PciDeviceError>858 fn setup_pci_config_mapping(
859 &mut self,
860 shmem: &SharedMemory,
861 base: usize,
862 len: usize,
863 ) -> std::result::Result<bool, PciDeviceError> {
864 self.config_regs
865 .setup_mapping(shmem, base, len)
866 .map(|_| true)
867 .map_err(PciDeviceError::MmioSetup)
868 }
869
read_bar(&mut self, bar_index: usize, offset: u64, data: &mut [u8])870 fn read_bar(&mut self, bar_index: usize, offset: u64, data: &mut [u8]) {
871 if bar_index == self.settings_bar {
872 match offset {
873 COMMON_CONFIG_BAR_OFFSET..=COMMON_CONFIG_LAST => self.common_config.read(
874 offset - COMMON_CONFIG_BAR_OFFSET,
875 data,
876 &mut self.queues,
877 self.device.as_mut(),
878 ),
879 ISR_CONFIG_BAR_OFFSET..=ISR_CONFIG_LAST => {
880 if let Some(v) = data.get_mut(0) {
881 // Reading this register resets it to 0.
882 *v = if let Some(interrupt) = &self.interrupt {
883 interrupt.read_and_reset_interrupt_status()
884 } else {
885 0
886 };
887 }
888 }
889 DEVICE_CONFIG_BAR_OFFSET..=DEVICE_CONFIG_LAST => {
890 self.device
891 .read_config(offset - DEVICE_CONFIG_BAR_OFFSET, data);
892 }
893 NOTIFICATION_BAR_OFFSET..=NOTIFICATION_LAST => {
894 // Handled with ioevents.
895 }
896 MSIX_TABLE_BAR_OFFSET..=MSIX_TABLE_LAST => {
897 self.msix_config
898 .lock()
899 .read_msix_table(offset - MSIX_TABLE_BAR_OFFSET, data);
900 }
901 MSIX_PBA_BAR_OFFSET..=MSIX_PBA_LAST => {
902 self.msix_config
903 .lock()
904 .read_pba_entries(offset - MSIX_PBA_BAR_OFFSET, data);
905 }
906 _ => (),
907 }
908 }
909 }
910
write_bar(&mut self, bar_index: usize, offset: u64, data: &[u8])911 fn write_bar(&mut self, bar_index: usize, offset: u64, data: &[u8]) {
912 let was_suspended = self.is_device_suspended();
913
914 if bar_index == self.settings_bar {
915 match offset {
916 COMMON_CONFIG_BAR_OFFSET..=COMMON_CONFIG_LAST => self.common_config.write(
917 offset - COMMON_CONFIG_BAR_OFFSET,
918 data,
919 &mut self.queues,
920 self.device.as_mut(),
921 ),
922 ISR_CONFIG_BAR_OFFSET..=ISR_CONFIG_LAST => {
923 if let Some(v) = data.first() {
924 if let Some(interrupt) = &self.interrupt {
925 interrupt.clear_interrupt_status_bits(*v);
926 }
927 }
928 }
929 DEVICE_CONFIG_BAR_OFFSET..=DEVICE_CONFIG_LAST => {
930 self.device
931 .write_config(offset - DEVICE_CONFIG_BAR_OFFSET, data);
932 }
933 NOTIFICATION_BAR_OFFSET..=NOTIFICATION_LAST => {
934 // Notifications are normally handled with ioevents inside the hypervisor and
935 // do not reach write_bar(). However, if the ioevent registration hasn't
936 // finished yet, it is possible for a write to the notification region to make
937 // it through as a normal MMIO exit and end up here. To handle that case,
938 // provide a fallback that looks up the corresponding queue for the offset and
939 // triggers its event, which is equivalent to what the ioevent would do.
940 let queue_index = (offset - NOTIFICATION_BAR_OFFSET) as usize
941 / NOTIFY_OFF_MULTIPLIER as usize;
942 trace!("write_bar notification fallback for queue {}", queue_index);
943 if let Some(evt) = self.queue_evts.get(queue_index) {
944 let _ = evt.event.signal();
945 }
946 }
947 MSIX_TABLE_BAR_OFFSET..=MSIX_TABLE_LAST => {
948 let behavior = self
949 .msix_config
950 .lock()
951 .write_msix_table(offset - MSIX_TABLE_BAR_OFFSET, data);
952 self.device.control_notify(behavior);
953 }
954 MSIX_PBA_BAR_OFFSET..=MSIX_PBA_LAST => {
955 self.msix_config
956 .lock()
957 .write_pba_entries(offset - MSIX_PBA_BAR_OFFSET, data);
958 }
959 _ => (),
960 }
961 }
962
963 if !self.device_activated && self.is_driver_ready() {
964 if let Err(e) = self.activate() {
965 error!("failed to activate device: {:#}", e);
966 }
967 }
968
969 let is_suspended = self.is_device_suspended();
970 if is_suspended != was_suspended {
971 if let Some(interrupt) = self.interrupt.as_mut() {
972 interrupt.set_suspended(is_suspended);
973 }
974 }
975
976 // Device has been reset by the driver
977 if self.device_activated && self.is_reset_requested() {
978 if let Err(e) = self.device.reset() {
979 error!("failed to reset {} device: {:#}", self.debug_label(), e);
980 } else {
981 self.device_activated = false;
982 // reset queues
983 self.queues.iter_mut().for_each(QueueConfig::reset);
984 // select queue 0 by default
985 self.common_config.queue_select = 0;
986 if let Err(e) = self.unregister_ioevents() {
987 error!("failed to unregister ioevents: {:#}", e);
988 }
989 if let Some(interrupt_resample_worker) = self.interrupt_resample_worker.take() {
990 interrupt_resample_worker.stop();
991 }
992 }
993 }
994 }
995
on_device_sandboxed(&mut self)996 fn on_device_sandboxed(&mut self) {
997 self.device.on_device_sandboxed();
998 }
999
1000 #[cfg(target_arch = "x86_64")]
generate_acpi(&mut self, sdts: &mut Vec<SDT>)1001 fn generate_acpi(&mut self, sdts: &mut Vec<SDT>) {
1002 self.device.generate_acpi(
1003 self.pci_address.expect("pci_address must be assigned"),
1004 sdts,
1005 )
1006 }
1007
as_virtio_pci_device(&self) -> Option<&VirtioPciDevice>1008 fn as_virtio_pci_device(&self) -> Option<&VirtioPciDevice> {
1009 Some(self)
1010 }
1011 }
1012
allocate_io_bars<F>( virtio_pci_device: &mut VirtioPciDevice, mut alloc_fn: F, ) -> std::result::Result<Vec<BarRange>, PciDeviceError> where F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,1013 fn allocate_io_bars<F>(
1014 virtio_pci_device: &mut VirtioPciDevice,
1015 mut alloc_fn: F,
1016 ) -> std::result::Result<Vec<BarRange>, PciDeviceError>
1017 where
1018 F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,
1019 {
1020 let address = virtio_pci_device
1021 .pci_address
1022 .expect("allocate_address must be called prior to allocate_io_bars");
1023 // Allocate one bar for the structures pointed to by the capability structures.
1024 let settings_config_addr = alloc_fn(
1025 CAPABILITY_BAR_SIZE,
1026 Alloc::PciBar {
1027 bus: address.bus,
1028 dev: address.dev,
1029 func: address.func,
1030 bar: 0,
1031 },
1032 AllocOptions::new()
1033 .max_address(u32::MAX.into())
1034 .align(CAPABILITY_BAR_SIZE),
1035 )?;
1036 let config = PciBarConfiguration::new(
1037 CAPABILITIES_BAR_NUM,
1038 CAPABILITY_BAR_SIZE,
1039 PciBarRegionType::Memory32BitRegion,
1040 PciBarPrefetchable::NotPrefetchable,
1041 )
1042 .set_address(settings_config_addr);
1043 let settings_bar = virtio_pci_device
1044 .config_regs
1045 .add_pci_bar(config)
1046 .map_err(|e| PciDeviceError::IoRegistrationFailed(settings_config_addr, e))?
1047 as u8;
1048 // Once the BARs are allocated, the capabilities can be added to the PCI configuration.
1049 virtio_pci_device.add_settings_pci_capabilities(settings_bar)?;
1050
1051 Ok(vec![BarRange {
1052 addr: settings_config_addr,
1053 size: CAPABILITY_BAR_SIZE,
1054 prefetchable: false,
1055 }])
1056 }
1057
allocate_device_bars<F>( virtio_pci_device: &mut VirtioPciDevice, mut alloc_fn: F, ) -> std::result::Result<Vec<BarRange>, PciDeviceError> where F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,1058 fn allocate_device_bars<F>(
1059 virtio_pci_device: &mut VirtioPciDevice,
1060 mut alloc_fn: F,
1061 ) -> std::result::Result<Vec<BarRange>, PciDeviceError>
1062 where
1063 F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,
1064 {
1065 let address = virtio_pci_device
1066 .pci_address
1067 .expect("allocate_address must be called prior to allocate_device_bars");
1068
1069 let configs = virtio_pci_device.device.get_device_bars(address);
1070 let configs = if !configs.is_empty() {
1071 configs
1072 } else {
1073 let region = match virtio_pci_device.device.get_shared_memory_region() {
1074 None => return Ok(Vec::new()),
1075 Some(r) => r,
1076 };
1077 let config = PciBarConfiguration::new(
1078 SHMEM_BAR_NUM,
1079 region
1080 .length
1081 .checked_next_power_of_two()
1082 .expect("bar too large"),
1083 PciBarRegionType::Memory64BitRegion,
1084 PciBarPrefetchable::Prefetchable,
1085 );
1086
1087 let alloc = Alloc::PciBar {
1088 bus: address.bus,
1089 dev: address.dev,
1090 func: address.func,
1091 bar: config.bar_index() as u8,
1092 };
1093
1094 let vm_memory_client = virtio_pci_device
1095 .shared_memory_vm_memory_client
1096 .take()
1097 .expect("missing shared_memory_tube");
1098
1099 // See comment VmMemoryRequest::execute
1100 let can_prepare = !virtio_pci_device
1101 .device
1102 .expose_shmem_descriptors_with_viommu();
1103 let prepare_type = if can_prepare {
1104 virtio_pci_device.device.get_shared_memory_prepare_type()
1105 } else {
1106 SharedMemoryPrepareType::DynamicPerMapping
1107 };
1108
1109 let vm_requester = Box::new(VmRequester::new(vm_memory_client, alloc, prepare_type));
1110 virtio_pci_device
1111 .device
1112 .set_shared_memory_mapper(vm_requester);
1113
1114 vec![config]
1115 };
1116 let mut ranges = vec![];
1117 for config in configs {
1118 let device_addr = alloc_fn(
1119 config.size(),
1120 Alloc::PciBar {
1121 bus: address.bus,
1122 dev: address.dev,
1123 func: address.func,
1124 bar: config.bar_index() as u8,
1125 },
1126 AllocOptions::new()
1127 .prefetchable(config.is_prefetchable())
1128 .align(config.size()),
1129 )?;
1130 let config = config.set_address(device_addr);
1131 let _device_bar = virtio_pci_device
1132 .config_regs
1133 .add_pci_bar(config)
1134 .map_err(|e| PciDeviceError::IoRegistrationFailed(device_addr, e))?;
1135 ranges.push(BarRange {
1136 addr: device_addr,
1137 size: config.size(),
1138 prefetchable: false,
1139 });
1140 }
1141
1142 if virtio_pci_device
1143 .device
1144 .get_shared_memory_region()
1145 .is_some()
1146 {
1147 let shmem_region = AddressRange::from_start_and_size(ranges[0].addr, ranges[0].size)
1148 .expect("invalid shmem region");
1149 virtio_pci_device
1150 .device
1151 .set_shared_memory_region(shmem_region);
1152 }
1153
1154 Ok(ranges)
1155 }
1156
1157 #[cfg(feature = "pci-hotplug")]
1158 impl HotPluggable for VirtioPciDevice {
1159 /// Sets PciAddress to pci_addr
set_pci_address(&mut self, pci_addr: PciAddress) -> std::result::Result<(), PciDeviceError>1160 fn set_pci_address(&mut self, pci_addr: PciAddress) -> std::result::Result<(), PciDeviceError> {
1161 self.pci_address = Some(pci_addr);
1162 Ok(())
1163 }
1164
1165 /// Configures IO BAR layout without memory alloc.
configure_io_bars(&mut self) -> std::result::Result<(), PciDeviceError>1166 fn configure_io_bars(&mut self) -> std::result::Result<(), PciDeviceError> {
1167 let mut simple_allocator = SimpleAllocator::new(0);
1168 allocate_io_bars(self, |size, _, _| simple_allocator.alloc(size, size)).map(|_| ())
1169 }
1170
1171 /// Configure device BAR layout without memory alloc.
configure_device_bars(&mut self) -> std::result::Result<(), PciDeviceError>1172 fn configure_device_bars(&mut self) -> std::result::Result<(), PciDeviceError> {
1173 // For device BAR, the space for CAPABILITY_BAR_SIZE should be skipped.
1174 let mut simple_allocator = SimpleAllocator::new(CAPABILITY_BAR_SIZE);
1175 allocate_device_bars(self, |size, _, _| simple_allocator.alloc(size, size)).map(|_| ())
1176 }
1177 }
1178
1179 #[cfg(feature = "pci-hotplug")]
1180 /// A simple allocator that can allocate non-overlapping aligned intervals.
1181 ///
1182 /// The addresses allocated are not exclusively reserved for the device, and cannot be used for a
1183 /// static device. The allocated placeholder address describes the layout of PCI BAR for hotplugged
1184 /// devices. Actual memory allocation is handled by PCI BAR reprogramming initiated by guest OS.
1185 struct SimpleAllocator {
1186 current_address: u64,
1187 }
1188
1189 #[cfg(feature = "pci-hotplug")]
1190 impl SimpleAllocator {
1191 /// Constructs SimpleAllocator. Address will start at or after base_address.
new(base_address: u64) -> Self1192 fn new(base_address: u64) -> Self {
1193 Self {
1194 current_address: base_address,
1195 }
1196 }
1197
1198 /// Allocate memory with size and align. Returns the start of address.
alloc(&mut self, size: u64, align: u64) -> std::result::Result<u64, PciDeviceError>1199 fn alloc(&mut self, size: u64, align: u64) -> std::result::Result<u64, PciDeviceError> {
1200 if align > 0 {
1201 // aligns current_address upward to align.
1202 self.current_address = self.current_address.next_multiple_of(align);
1203 }
1204 let start_address = self.current_address;
1205 self.current_address += size;
1206 Ok(start_address)
1207 }
1208 }
1209
1210 impl Suspendable for VirtioPciDevice {
sleep(&mut self) -> anyhow::Result<()>1211 fn sleep(&mut self) -> anyhow::Result<()> {
1212 // If the device is already asleep, we should not request it to sleep again.
1213 if self.sleep_state.is_some() {
1214 return Ok(());
1215 }
1216
1217 if let Some(queues) = self.device.virtio_sleep()? {
1218 anyhow::ensure!(
1219 self.device_activated,
1220 format!(
1221 "unactivated device {} returned queues on sleep",
1222 self.debug_label()
1223 ),
1224 );
1225 self.sleep_state = Some(SleepState::Active {
1226 activated_queues: queues,
1227 });
1228 } else {
1229 anyhow::ensure!(
1230 !self.device_activated,
1231 format!(
1232 "activated device {} didn't return queues on sleep",
1233 self.debug_label()
1234 ),
1235 );
1236 self.sleep_state = Some(SleepState::Inactive);
1237 }
1238 Ok(())
1239 }
1240
wake(&mut self) -> anyhow::Result<()>1241 fn wake(&mut self) -> anyhow::Result<()> {
1242 match self.sleep_state.take() {
1243 None => {
1244 // If the device is already awake, we should not request it to wake again.
1245 }
1246 Some(SleepState::Inactive) => {
1247 self.device.virtio_wake(None).with_context(|| {
1248 format!(
1249 "virtio_wake failed for {}, can't recover",
1250 self.debug_label(),
1251 )
1252 })?;
1253 }
1254 Some(SleepState::Active { activated_queues }) => {
1255 self.device
1256 .virtio_wake(Some((
1257 self.mem.clone(),
1258 self.interrupt
1259 .clone()
1260 .expect("interrupt missing for already active queues"),
1261 activated_queues,
1262 )))
1263 .with_context(|| {
1264 format!(
1265 "virtio_wake failed for {}, can't recover",
1266 self.debug_label(),
1267 )
1268 })?;
1269 }
1270 };
1271 Ok(())
1272 }
1273
snapshot(&mut self) -> anyhow::Result<AnySnapshot>1274 fn snapshot(&mut self) -> anyhow::Result<AnySnapshot> {
1275 if self.iommu.is_some() {
1276 return Err(anyhow!("Cannot snapshot if iommu is present."));
1277 }
1278
1279 AnySnapshot::to_any(VirtioPciDeviceSnapshot {
1280 config_regs: self.config_regs.snapshot()?,
1281 inner_device: self.device.virtio_snapshot()?,
1282 device_activated: self.device_activated,
1283 interrupt: self.interrupt.as_ref().map(|i| i.snapshot()),
1284 msix_config: self.msix_config.lock().snapshot()?,
1285 common_config: self.common_config,
1286 queues: self
1287 .queues
1288 .iter()
1289 .map(|q| q.snapshot())
1290 .collect::<anyhow::Result<Vec<_>>>()?,
1291 activated_queues: match &self.sleep_state {
1292 None => {
1293 anyhow::bail!("tried snapshotting while awake")
1294 }
1295 Some(SleepState::Inactive) => None,
1296 Some(SleepState::Active { activated_queues }) => {
1297 let mut serialized_queues = Vec::new();
1298 for (index, queue) in activated_queues.iter() {
1299 serialized_queues.push((*index, queue.snapshot()?));
1300 }
1301 Some(serialized_queues)
1302 }
1303 },
1304 })
1305 .context("failed to serialize VirtioPciDeviceSnapshot")
1306 }
1307
restore(&mut self, data: AnySnapshot) -> anyhow::Result<()>1308 fn restore(&mut self, data: AnySnapshot) -> anyhow::Result<()> {
1309 // Restoring from an activated state is more complex and low priority, so just fail for
1310 // now. We'll need to reset the device before restoring, e.g. must call
1311 // self.unregister_ioevents().
1312 anyhow::ensure!(
1313 !self.device_activated,
1314 "tried to restore after virtio device activated. not supported yet"
1315 );
1316
1317 let deser: VirtioPciDeviceSnapshot = AnySnapshot::from_any(data)?;
1318
1319 self.config_regs.restore(deser.config_regs)?;
1320 self.device_activated = deser.device_activated;
1321
1322 self.msix_config.lock().restore(deser.msix_config)?;
1323 self.common_config = deser.common_config;
1324
1325 // Restore the interrupt. This must be done after restoring the MSI-X configuration, but
1326 // before restoring the queues.
1327 if let Some(deser_interrupt) = deser.interrupt {
1328 let interrupt = Interrupt::new_from_snapshot(
1329 self.interrupt_evt
1330 .as_ref()
1331 .ok_or_else(|| anyhow!("{} interrupt_evt is none", self.debug_label()))?
1332 .try_clone()
1333 .with_context(|| {
1334 format!("{} failed to clone interrupt_evt", self.debug_label())
1335 })?,
1336 Some(self.msix_config.clone()),
1337 self.common_config.msix_config,
1338 deser_interrupt,
1339 #[cfg(target_arch = "x86_64")]
1340 Some((
1341 PmWakeupEvent::new(self.vm_control_tube.clone(), self.pm_config.clone()),
1342 MetricEventType::VirtioWakeup {
1343 virtio_id: self.device.device_type() as u32,
1344 },
1345 )),
1346 );
1347 self.interrupt_resample_worker = interrupt.spawn_resample_thread();
1348 self.interrupt = Some(interrupt);
1349 }
1350
1351 assert_eq!(
1352 self.queues.len(),
1353 deser.queues.len(),
1354 "device must have the same number of queues"
1355 );
1356 for (q, s) in self.queues.iter_mut().zip(deser.queues.into_iter()) {
1357 q.restore(s)?;
1358 }
1359
1360 // Verify we are asleep and inactive.
1361 match &self.sleep_state {
1362 None => {
1363 anyhow::bail!("tried restoring while awake")
1364 }
1365 Some(SleepState::Inactive) => {}
1366 Some(SleepState::Active { .. }) => {
1367 anyhow::bail!("tried to restore after virtio device activated. not supported yet")
1368 }
1369 };
1370 // Restore `sleep_state`.
1371 if let Some(activated_queues_snapshot) = deser.activated_queues {
1372 let interrupt = self
1373 .interrupt
1374 .as_ref()
1375 .context("tried to restore active queues without an interrupt")?;
1376 let mut activated_queues = BTreeMap::new();
1377 for (index, queue_snapshot) in activated_queues_snapshot {
1378 let queue_config = self
1379 .queues
1380 .get(index)
1381 .with_context(|| format!("missing queue config for activated queue {index}"))?;
1382 let queue_evt = self
1383 .queue_evts
1384 .get(index)
1385 .with_context(|| format!("missing queue event for activated queue {index}"))?
1386 .event
1387 .try_clone()
1388 .context("failed to clone queue event")?;
1389 activated_queues.insert(
1390 index,
1391 Queue::restore(
1392 queue_config,
1393 queue_snapshot,
1394 &self.mem,
1395 queue_evt,
1396 interrupt.clone(),
1397 )?,
1398 );
1399 }
1400
1401 // Restore the activated queues.
1402 self.sleep_state = Some(SleepState::Active { activated_queues });
1403 } else {
1404 self.sleep_state = Some(SleepState::Inactive);
1405 }
1406
1407 // Call register_io_events for the activated queue events.
1408 let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
1409 let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
1410 self.queues
1411 .iter()
1412 .enumerate()
1413 .zip(self.queue_evts.iter_mut())
1414 .filter(|((_, q), _)| q.ready())
1415 .try_for_each(|((queue_index, _queue), evt)| {
1416 if !evt.ioevent_registered {
1417 self.ioevent_vm_memory_client
1418 .register_io_event(
1419 evt.event.try_clone().context("failed to clone Event")?,
1420 notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
1421 Datamatch::AnyLength,
1422 )
1423 .context("failed to register ioevent")?;
1424 evt.ioevent_registered = true;
1425 }
1426 Ok::<(), anyhow::Error>(())
1427 })?;
1428
1429 // There might be data in the queue that wasn't drained by the device
1430 // at the time it was snapshotted. In this case, the doorbell should
1431 // still be signaled. If it is not, the driver may never re-trigger the
1432 // doorbell, and the device will stall. So here, we explicitly signal
1433 // every doorbell. Spurious doorbells are safe (devices will check their
1434 // queue, realize nothing is there, and go back to sleep.)
1435 self.queue_evts.iter_mut().try_for_each(|queue_event| {
1436 queue_event
1437 .event
1438 .signal()
1439 .context("failed to wake doorbell")
1440 })?;
1441
1442 self.device.virtio_restore(deser.inner_device)?;
1443
1444 Ok(())
1445 }
1446 }
1447
1448 struct VmRequester {
1449 vm_memory_client: VmMemoryClient,
1450 alloc: Alloc,
1451 mappings: BTreeMap<u64, VmMemoryRegionId>,
1452 prepare_type: SharedMemoryPrepareType,
1453 prepared: bool,
1454 }
1455
1456 impl VmRequester {
new( vm_memory_client: VmMemoryClient, alloc: Alloc, prepare_type: SharedMemoryPrepareType, ) -> Self1457 fn new(
1458 vm_memory_client: VmMemoryClient,
1459 alloc: Alloc,
1460 prepare_type: SharedMemoryPrepareType,
1461 ) -> Self {
1462 Self {
1463 vm_memory_client,
1464 alloc,
1465 mappings: BTreeMap::new(),
1466 prepare_type,
1467 prepared: false,
1468 }
1469 }
1470 }
1471
1472 impl SharedMemoryMapper for VmRequester {
add_mapping( &mut self, source: VmMemorySource, offset: u64, prot: Protection, cache: MemCacheType, ) -> anyhow::Result<()>1473 fn add_mapping(
1474 &mut self,
1475 source: VmMemorySource,
1476 offset: u64,
1477 prot: Protection,
1478 cache: MemCacheType,
1479 ) -> anyhow::Result<()> {
1480 if !self.prepared {
1481 if let SharedMemoryPrepareType::SingleMappingOnFirst(prepare_cache_type) =
1482 self.prepare_type
1483 {
1484 debug!(
1485 "lazy prepare_shared_memory_region with {:?}",
1486 prepare_cache_type
1487 );
1488 self.vm_memory_client
1489 .prepare_shared_memory_region(self.alloc, prepare_cache_type)
1490 .context("lazy prepare_shared_memory_region failed")?;
1491 }
1492 self.prepared = true;
1493 }
1494
1495 // devices must implement VirtioDevice::get_shared_memory_prepare_type(), returning
1496 // SharedMemoryPrepareType::SingleMappingOnFirst(MemCacheType::CacheNonCoherent) in order to
1497 // add any mapping that requests MemCacheType::CacheNonCoherent.
1498 if cache == MemCacheType::CacheNonCoherent {
1499 if let SharedMemoryPrepareType::SingleMappingOnFirst(MemCacheType::CacheCoherent) =
1500 self.prepare_type
1501 {
1502 error!("invalid request to map with CacheNonCoherent for device with prepared CacheCoherent memory");
1503 return Err(anyhow!("invalid MemCacheType"));
1504 }
1505 }
1506
1507 let id = self
1508 .vm_memory_client
1509 .register_memory(
1510 source,
1511 VmMemoryDestination::ExistingAllocation {
1512 allocation: self.alloc,
1513 offset,
1514 },
1515 prot,
1516 cache,
1517 )
1518 .context("register_memory failed")?;
1519
1520 self.mappings.insert(offset, id);
1521 Ok(())
1522 }
1523
remove_mapping(&mut self, offset: u64) -> anyhow::Result<()>1524 fn remove_mapping(&mut self, offset: u64) -> anyhow::Result<()> {
1525 let id = self.mappings.remove(&offset).context("invalid offset")?;
1526 self.vm_memory_client
1527 .unregister_memory(id)
1528 .context("unregister_memory failed")
1529 }
1530
as_raw_descriptor(&self) -> Option<RawDescriptor>1531 fn as_raw_descriptor(&self) -> Option<RawDescriptor> {
1532 Some(self.vm_memory_client.as_raw_descriptor())
1533 }
1534 }
1535
1536 #[cfg(test)]
1537 mod tests {
1538
1539 #[cfg(feature = "pci-hotplug")]
1540 #[test]
allocate_aligned_address()1541 fn allocate_aligned_address() {
1542 let mut simple_allocator = super::SimpleAllocator::new(0);
1543 // start at 0, aligned to 0x80. Interval end at 0x20.
1544 assert_eq!(simple_allocator.alloc(0x20, 0x80).unwrap(), 0);
1545 // 0x20 => start at 0x40. Interval end at 0x80.
1546 assert_eq!(simple_allocator.alloc(0x40, 0x40).unwrap(), 0x40);
1547 // 0x80 => start at 0x80, Interval end at 0x108.
1548 assert_eq!(simple_allocator.alloc(0x88, 0x80).unwrap(), 0x80);
1549 // 0x108 => start at 0x180. Interval end at 0x1b0.
1550 assert_eq!(simple_allocator.alloc(0x30, 0x80).unwrap(), 0x180);
1551 }
1552 }
1553