• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::collections::BTreeMap;
6 use std::sync::Arc;
7 
8 #[cfg(target_arch = "x86_64")]
9 use acpi_tables::sdt::SDT;
10 use anyhow::anyhow;
11 use anyhow::Context;
12 use base::debug;
13 use base::error;
14 use base::trace;
15 use base::AsRawDescriptor;
16 use base::AsRawDescriptors;
17 use base::Event;
18 use base::Protection;
19 use base::RawDescriptor;
20 use base::Result;
21 use base::SharedMemory;
22 use base::Tube;
23 use base::WorkerThread;
24 use data_model::Le32;
25 use hypervisor::Datamatch;
26 use hypervisor::MemCacheType;
27 use libc::ERANGE;
28 #[cfg(target_arch = "x86_64")]
29 use metrics::MetricEventType;
30 use resources::AddressRange;
31 use resources::Alloc;
32 use resources::AllocOptions;
33 use resources::SystemAllocator;
34 use serde::Deserialize;
35 use serde::Serialize;
36 use snapshot::AnySnapshot;
37 use sync::Mutex;
38 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_ACKNOWLEDGE;
39 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_DRIVER;
40 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_DRIVER_OK;
41 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_FAILED;
42 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_FEATURES_OK;
43 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_NEEDS_RESET;
44 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_SUSPEND;
45 use vm_control::api::VmMemoryClient;
46 use vm_control::VmMemoryDestination;
47 use vm_control::VmMemoryRegionId;
48 use vm_control::VmMemorySource;
49 use vm_memory::GuestMemory;
50 use zerocopy::FromBytes;
51 use zerocopy::Immutable;
52 use zerocopy::IntoBytes;
53 use zerocopy::KnownLayout;
54 
55 use self::virtio_pci_common_config::VirtioPciCommonConfig;
56 use super::*;
57 #[cfg(target_arch = "x86_64")]
58 use crate::acpi::PmWakeupEvent;
59 #[cfg(target_arch = "x86_64")]
60 use crate::pci::pm::PciDevicePower;
61 use crate::pci::pm::PciPmCap;
62 use crate::pci::pm::PmConfig;
63 use crate::pci::pm::PmStatusChange;
64 use crate::pci::BarRange;
65 use crate::pci::MsixCap;
66 use crate::pci::MsixConfig;
67 use crate::pci::MsixStatus;
68 use crate::pci::PciAddress;
69 use crate::pci::PciBarConfiguration;
70 use crate::pci::PciBarIndex;
71 use crate::pci::PciBarPrefetchable;
72 use crate::pci::PciBarRegionType;
73 use crate::pci::PciBaseSystemPeripheralSubclass;
74 use crate::pci::PciCapability;
75 use crate::pci::PciCapabilityID;
76 use crate::pci::PciClassCode;
77 use crate::pci::PciConfiguration;
78 use crate::pci::PciDevice;
79 use crate::pci::PciDeviceError;
80 use crate::pci::PciDisplaySubclass;
81 use crate::pci::PciHeaderType;
82 use crate::pci::PciId;
83 use crate::pci::PciInputDeviceSubclass;
84 use crate::pci::PciInterruptPin;
85 use crate::pci::PciMassStorageSubclass;
86 use crate::pci::PciMultimediaSubclass;
87 use crate::pci::PciNetworkControllerSubclass;
88 use crate::pci::PciSimpleCommunicationControllerSubclass;
89 use crate::pci::PciSubclass;
90 use crate::pci::PciWirelessControllerSubclass;
91 use crate::virtio::ipc_memory_mapper::IpcMemoryMapper;
92 #[cfg(feature = "pci-hotplug")]
93 use crate::HotPluggable;
94 use crate::IrqLevelEvent;
95 use crate::Suspendable;
96 
97 #[repr(u8)]
98 #[derive(Debug, Copy, Clone, enumn::N)]
99 pub enum PciCapabilityType {
100     CommonConfig = 1,
101     NotifyConfig = 2,
102     IsrConfig = 3,
103     DeviceConfig = 4,
104     PciConfig = 5,
105     // Doorbell, Notification and SharedMemory are Virtio Vhost User related PCI
106     // capabilities. Specified in 5.7.7.4 here
107     // https://stefanha.github.io/virtio/vhost-user-slave.html#x1-2830007.
108     DoorbellConfig = 6,
109     NotificationConfig = 7,
110     SharedMemoryConfig = 8,
111 }
112 
113 #[allow(dead_code)]
114 #[repr(C)]
115 #[derive(Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)]
116 pub struct VirtioPciCap {
117     // cap_vndr and cap_next are autofilled based on id() in pci configuration
118     pub cap_vndr: u8, // Generic PCI field: PCI_CAP_ID_VNDR
119     pub cap_next: u8, // Generic PCI field: next ptr
120     pub cap_len: u8,  // Generic PCI field: capability length
121     pub cfg_type: u8, // Identifies the structure.
122     pub bar: u8,      // Where to find it.
123     id: u8,           // Multiple capabilities of the same type
124     padding: [u8; 2], // Pad to full dword.
125     pub offset: Le32, // Offset within bar.
126     pub length: Le32, // Length of the structure, in bytes.
127 }
128 
129 impl PciCapability for VirtioPciCap {
bytes(&self) -> &[u8]130     fn bytes(&self) -> &[u8] {
131         self.as_bytes()
132     }
133 
id(&self) -> PciCapabilityID134     fn id(&self) -> PciCapabilityID {
135         PciCapabilityID::VendorSpecific
136     }
137 
writable_bits(&self) -> Vec<u32>138     fn writable_bits(&self) -> Vec<u32> {
139         vec![0u32; 4]
140     }
141 }
142 
143 impl VirtioPciCap {
new(cfg_type: PciCapabilityType, bar: u8, offset: u32, length: u32) -> Self144     pub fn new(cfg_type: PciCapabilityType, bar: u8, offset: u32, length: u32) -> Self {
145         VirtioPciCap {
146             cap_vndr: 0,
147             cap_next: 0,
148             cap_len: std::mem::size_of::<VirtioPciCap>() as u8,
149             cfg_type: cfg_type as u8,
150             bar,
151             id: 0,
152             padding: [0; 2],
153             offset: Le32::from(offset),
154             length: Le32::from(length),
155         }
156     }
157 
set_cap_len(&mut self, cap_len: u8)158     pub fn set_cap_len(&mut self, cap_len: u8) {
159         self.cap_len = cap_len;
160     }
161 }
162 
163 #[allow(dead_code)]
164 #[repr(C)]
165 #[derive(Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)]
166 pub struct VirtioPciNotifyCap {
167     cap: VirtioPciCap,
168     notify_off_multiplier: Le32,
169 }
170 
171 impl PciCapability for VirtioPciNotifyCap {
bytes(&self) -> &[u8]172     fn bytes(&self) -> &[u8] {
173         self.as_bytes()
174     }
175 
id(&self) -> PciCapabilityID176     fn id(&self) -> PciCapabilityID {
177         PciCapabilityID::VendorSpecific
178     }
179 
writable_bits(&self) -> Vec<u32>180     fn writable_bits(&self) -> Vec<u32> {
181         vec![0u32; 5]
182     }
183 }
184 
185 impl VirtioPciNotifyCap {
new( cfg_type: PciCapabilityType, bar: u8, offset: u32, length: u32, multiplier: Le32, ) -> Self186     pub fn new(
187         cfg_type: PciCapabilityType,
188         bar: u8,
189         offset: u32,
190         length: u32,
191         multiplier: Le32,
192     ) -> Self {
193         VirtioPciNotifyCap {
194             cap: VirtioPciCap {
195                 cap_vndr: 0,
196                 cap_next: 0,
197                 cap_len: std::mem::size_of::<VirtioPciNotifyCap>() as u8,
198                 cfg_type: cfg_type as u8,
199                 bar,
200                 id: 0,
201                 padding: [0; 2],
202                 offset: Le32::from(offset),
203                 length: Le32::from(length),
204             },
205             notify_off_multiplier: multiplier,
206         }
207     }
208 }
209 
210 #[repr(C)]
211 #[derive(Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)]
212 pub struct VirtioPciShmCap {
213     cap: VirtioPciCap,
214     offset_hi: Le32, // Most sig 32 bits of offset
215     length_hi: Le32, // Most sig 32 bits of length
216 }
217 
218 impl PciCapability for VirtioPciShmCap {
bytes(&self) -> &[u8]219     fn bytes(&self) -> &[u8] {
220         self.as_bytes()
221     }
222 
id(&self) -> PciCapabilityID223     fn id(&self) -> PciCapabilityID {
224         PciCapabilityID::VendorSpecific
225     }
226 
writable_bits(&self) -> Vec<u32>227     fn writable_bits(&self) -> Vec<u32> {
228         vec![0u32; 6]
229     }
230 }
231 
232 impl VirtioPciShmCap {
new(cfg_type: PciCapabilityType, bar: u8, offset: u64, length: u64, shmid: u8) -> Self233     pub fn new(cfg_type: PciCapabilityType, bar: u8, offset: u64, length: u64, shmid: u8) -> Self {
234         VirtioPciShmCap {
235             cap: VirtioPciCap {
236                 cap_vndr: 0,
237                 cap_next: 0,
238                 cap_len: std::mem::size_of::<VirtioPciShmCap>() as u8,
239                 cfg_type: cfg_type as u8,
240                 bar,
241                 id: shmid,
242                 padding: [0; 2],
243                 offset: Le32::from(offset as u32),
244                 length: Le32::from(length as u32),
245             },
246             offset_hi: Le32::from((offset >> 32) as u32),
247             length_hi: Le32::from((length >> 32) as u32),
248         }
249     }
250 }
251 
252 // Allocate one bar for the structs pointed to by the capability structures.
253 const COMMON_CONFIG_BAR_OFFSET: u64 = 0x0000;
254 const COMMON_CONFIG_SIZE: u64 = 56;
255 const COMMON_CONFIG_LAST: u64 = COMMON_CONFIG_BAR_OFFSET + COMMON_CONFIG_SIZE - 1;
256 const ISR_CONFIG_BAR_OFFSET: u64 = 0x1000;
257 const ISR_CONFIG_SIZE: u64 = 1;
258 const ISR_CONFIG_LAST: u64 = ISR_CONFIG_BAR_OFFSET + ISR_CONFIG_SIZE - 1;
259 const DEVICE_CONFIG_BAR_OFFSET: u64 = 0x2000;
260 const DEVICE_CONFIG_SIZE: u64 = 0x1000;
261 const DEVICE_CONFIG_LAST: u64 = DEVICE_CONFIG_BAR_OFFSET + DEVICE_CONFIG_SIZE - 1;
262 const NOTIFICATION_BAR_OFFSET: u64 = 0x3000;
263 const NOTIFICATION_SIZE: u64 = 0x1000;
264 const NOTIFICATION_LAST: u64 = NOTIFICATION_BAR_OFFSET + NOTIFICATION_SIZE - 1;
265 const MSIX_TABLE_BAR_OFFSET: u64 = 0x6000;
266 const MSIX_TABLE_SIZE: u64 = 0x1000;
267 const MSIX_TABLE_LAST: u64 = MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE - 1;
268 const MSIX_PBA_BAR_OFFSET: u64 = 0x7000;
269 const MSIX_PBA_SIZE: u64 = 0x1000;
270 const MSIX_PBA_LAST: u64 = MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE - 1;
271 const CAPABILITY_BAR_SIZE: u64 = 0x8000;
272 
273 const NOTIFY_OFF_MULTIPLIER: u32 = 4; // A dword per notification address.
274 
275 const VIRTIO_PCI_VENDOR_ID: u16 = 0x1af4;
276 const VIRTIO_PCI_DEVICE_ID_BASE: u16 = 0x1040; // Add to device type to get device ID.
277 const VIRTIO_PCI_REVISION_ID: u8 = 1;
278 
279 const CAPABILITIES_BAR_NUM: usize = 0;
280 const SHMEM_BAR_NUM: usize = 2;
281 
282 struct QueueEvent {
283     event: Event,
284     ioevent_registered: bool,
285 }
286 
287 /// Implements the
288 /// [PCI](http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-650001)
289 /// transport for virtio devices.
290 pub struct VirtioPciDevice {
291     config_regs: PciConfiguration,
292     preferred_address: Option<PciAddress>,
293     pci_address: Option<PciAddress>,
294 
295     device: Box<dyn VirtioDevice>,
296     device_activated: bool,
297     disable_intx: bool,
298 
299     interrupt: Option<Interrupt>,
300     interrupt_evt: Option<IrqLevelEvent>,
301     interrupt_resample_worker: Option<WorkerThread<()>>,
302 
303     queues: Vec<QueueConfig>,
304     queue_evts: Vec<QueueEvent>,
305     mem: GuestMemory,
306     settings_bar: PciBarIndex,
307     msix_config: Arc<Mutex<MsixConfig>>,
308     pm_config: Arc<Mutex<PmConfig>>,
309     common_config: VirtioPciCommonConfig,
310 
311     iommu: Option<Arc<Mutex<IpcMemoryMapper>>>,
312 
313     // API client that is present if the device has shared memory regions, and
314     // is used to map/unmap files into the shared memory region.
315     shared_memory_vm_memory_client: Option<VmMemoryClient>,
316 
317     // API client for registration of ioevents when PCI BAR reprogramming is detected.
318     ioevent_vm_memory_client: VmMemoryClient,
319 
320     // State only present while asleep.
321     sleep_state: Option<SleepState>,
322 
323     vm_control_tube: Arc<Mutex<Tube>>,
324 }
325 
326 enum SleepState {
327     // Asleep and device hasn't been activated yet by the guest.
328     Inactive,
329     // Asleep and device has been activated by the guest.
330     Active {
331         /// The queues returned from `VirtioDevice::virtio_sleep`.
332         /// Map is from queue index -> Queue.
333         activated_queues: BTreeMap<usize, Queue>,
334     },
335 }
336 
337 #[derive(Serialize, Deserialize)]
338 struct VirtioPciDeviceSnapshot {
339     config_regs: AnySnapshot,
340 
341     inner_device: AnySnapshot,
342     device_activated: bool,
343 
344     interrupt: Option<InterruptSnapshot>,
345     msix_config: AnySnapshot,
346     common_config: VirtioPciCommonConfig,
347 
348     queues: Vec<AnySnapshot>,
349     activated_queues: Option<Vec<(usize, AnySnapshot)>>,
350 }
351 
352 impl VirtioPciDevice {
353     /// Constructs a new PCI transport for the given virtio device.
new( mem: GuestMemory, device: Box<dyn VirtioDevice>, msi_device_tube: Tube, disable_intx: bool, shared_memory_vm_memory_client: Option<VmMemoryClient>, ioevent_vm_memory_client: VmMemoryClient, vm_control_tube: Tube, ) -> Result<Self>354     pub fn new(
355         mem: GuestMemory,
356         device: Box<dyn VirtioDevice>,
357         msi_device_tube: Tube,
358         disable_intx: bool,
359         shared_memory_vm_memory_client: Option<VmMemoryClient>,
360         ioevent_vm_memory_client: VmMemoryClient,
361         vm_control_tube: Tube,
362     ) -> Result<Self> {
363         // shared_memory_vm_memory_client is required if there are shared memory regions.
364         assert_eq!(
365             device.get_shared_memory_region().is_none(),
366             shared_memory_vm_memory_client.is_none()
367         );
368 
369         let mut queue_evts = Vec::new();
370         for _ in device.queue_max_sizes() {
371             queue_evts.push(QueueEvent {
372                 event: Event::new()?,
373                 ioevent_registered: false,
374             });
375         }
376         let queues = device
377             .queue_max_sizes()
378             .iter()
379             .map(|&s| QueueConfig::new(s, device.features()))
380             .collect();
381 
382         let pci_device_id = VIRTIO_PCI_DEVICE_ID_BASE + device.device_type() as u16;
383 
384         let (pci_device_class, pci_device_subclass) = match device.device_type() {
385             DeviceType::Net => (
386                 PciClassCode::NetworkController,
387                 &PciNetworkControllerSubclass::Other as &dyn PciSubclass,
388             ),
389             DeviceType::Block => (
390                 PciClassCode::MassStorage,
391                 &PciMassStorageSubclass::Other as &dyn PciSubclass,
392             ),
393             DeviceType::Console => (
394                 PciClassCode::SimpleCommunicationController,
395                 &PciSimpleCommunicationControllerSubclass::Other as &dyn PciSubclass,
396             ),
397             DeviceType::Rng => (
398                 PciClassCode::BaseSystemPeripheral,
399                 &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
400             ),
401             DeviceType::Balloon => (
402                 PciClassCode::BaseSystemPeripheral,
403                 &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
404             ),
405             DeviceType::Scsi => (
406                 PciClassCode::MassStorage,
407                 &PciMassStorageSubclass::Scsi as &dyn PciSubclass,
408             ),
409             DeviceType::P9 => (
410                 PciClassCode::NetworkController,
411                 &PciNetworkControllerSubclass::Other as &dyn PciSubclass,
412             ),
413             DeviceType::Gpu => (
414                 PciClassCode::DisplayController,
415                 &PciDisplaySubclass::Other as &dyn PciSubclass,
416             ),
417             DeviceType::Input => (
418                 PciClassCode::InputDevice,
419                 &PciInputDeviceSubclass::Other as &dyn PciSubclass,
420             ),
421             DeviceType::Vsock => (
422                 PciClassCode::NetworkController,
423                 &PciNetworkControllerSubclass::Other as &dyn PciSubclass,
424             ),
425             DeviceType::Iommu => (
426                 PciClassCode::BaseSystemPeripheral,
427                 &PciBaseSystemPeripheralSubclass::Iommu as &dyn PciSubclass,
428             ),
429             DeviceType::Sound => (
430                 PciClassCode::MultimediaController,
431                 &PciMultimediaSubclass::AudioController as &dyn PciSubclass,
432             ),
433             DeviceType::Fs => (
434                 PciClassCode::MassStorage,
435                 &PciMassStorageSubclass::Other as &dyn PciSubclass,
436             ),
437             DeviceType::Pmem => (
438                 PciClassCode::MassStorage,
439                 &PciMassStorageSubclass::NonVolatileMemory as &dyn PciSubclass,
440             ),
441             DeviceType::Mac80211HwSim => (
442                 PciClassCode::WirelessController,
443                 &PciWirelessControllerSubclass::Other as &dyn PciSubclass,
444             ),
445             DeviceType::VideoEncoder => (
446                 PciClassCode::MultimediaController,
447                 &PciMultimediaSubclass::VideoController as &dyn PciSubclass,
448             ),
449             DeviceType::VideoDecoder => (
450                 PciClassCode::MultimediaController,
451                 &PciMultimediaSubclass::VideoController as &dyn PciSubclass,
452             ),
453             DeviceType::Media => (
454                 PciClassCode::MultimediaController,
455                 &PciMultimediaSubclass::VideoController as &dyn PciSubclass,
456             ),
457             DeviceType::Scmi => (
458                 PciClassCode::BaseSystemPeripheral,
459                 &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
460             ),
461             DeviceType::Wl => (
462                 PciClassCode::DisplayController,
463                 &PciDisplaySubclass::Other as &dyn PciSubclass,
464             ),
465             DeviceType::Tpm => (
466                 PciClassCode::BaseSystemPeripheral,
467                 &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
468             ),
469             DeviceType::Pvclock => (
470                 PciClassCode::BaseSystemPeripheral,
471                 &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
472             ),
473         };
474 
475         let num_interrupts = device.num_interrupts();
476 
477         // One MSI-X vector per queue plus one for configuration changes.
478         let msix_num = u16::try_from(num_interrupts + 1).map_err(|_| base::Error::new(ERANGE))?;
479         let msix_config = Arc::new(Mutex::new(MsixConfig::new(
480             msix_num,
481             msi_device_tube,
482             PciId::new(VIRTIO_PCI_VENDOR_ID, pci_device_id).into(),
483             device.debug_label(),
484         )));
485 
486         let config_regs = PciConfiguration::new(
487             VIRTIO_PCI_VENDOR_ID,
488             pci_device_id,
489             pci_device_class,
490             pci_device_subclass,
491             None,
492             PciHeaderType::Device,
493             VIRTIO_PCI_VENDOR_ID,
494             pci_device_id,
495             VIRTIO_PCI_REVISION_ID,
496         );
497 
498         Ok(VirtioPciDevice {
499             config_regs,
500             preferred_address: device.pci_address(),
501             pci_address: None,
502             device,
503             device_activated: false,
504             disable_intx,
505             interrupt: None,
506             interrupt_evt: None,
507             interrupt_resample_worker: None,
508             queues,
509             queue_evts,
510             mem,
511             settings_bar: 0,
512             msix_config,
513             pm_config: Arc::new(Mutex::new(PmConfig::new(true))),
514             common_config: VirtioPciCommonConfig {
515                 driver_status: 0,
516                 config_generation: 0,
517                 device_feature_select: 0,
518                 driver_feature_select: 0,
519                 queue_select: 0,
520                 msix_config: VIRTIO_MSI_NO_VECTOR,
521             },
522             iommu: None,
523             shared_memory_vm_memory_client,
524             ioevent_vm_memory_client,
525             sleep_state: None,
526             vm_control_tube: Arc::new(Mutex::new(vm_control_tube)),
527         })
528     }
529 
is_driver_ready(&self) -> bool530     fn is_driver_ready(&self) -> bool {
531         let ready_bits = (VIRTIO_CONFIG_S_ACKNOWLEDGE
532             | VIRTIO_CONFIG_S_DRIVER
533             | VIRTIO_CONFIG_S_DRIVER_OK
534             | VIRTIO_CONFIG_S_FEATURES_OK) as u8;
535         (self.common_config.driver_status & ready_bits) == ready_bits
536             && self.common_config.driver_status & VIRTIO_CONFIG_S_FAILED as u8 == 0
537     }
538 
is_device_suspended(&self) -> bool539     fn is_device_suspended(&self) -> bool {
540         (self.common_config.driver_status & VIRTIO_CONFIG_S_SUSPEND as u8) != 0
541     }
542 
543     /// Determines if the driver has requested the device reset itself
is_reset_requested(&self) -> bool544     fn is_reset_requested(&self) -> bool {
545         self.common_config.driver_status == DEVICE_RESET as u8
546     }
547 
add_settings_pci_capabilities( &mut self, settings_bar: u8, ) -> std::result::Result<(), PciDeviceError>548     fn add_settings_pci_capabilities(
549         &mut self,
550         settings_bar: u8,
551     ) -> std::result::Result<(), PciDeviceError> {
552         // Add pointers to the different configuration structures from the PCI capabilities.
553         let common_cap = VirtioPciCap::new(
554             PciCapabilityType::CommonConfig,
555             settings_bar,
556             COMMON_CONFIG_BAR_OFFSET as u32,
557             COMMON_CONFIG_SIZE as u32,
558         );
559         self.config_regs
560             .add_capability(&common_cap, None)
561             .map_err(PciDeviceError::CapabilitiesSetup)?;
562 
563         let isr_cap = VirtioPciCap::new(
564             PciCapabilityType::IsrConfig,
565             settings_bar,
566             ISR_CONFIG_BAR_OFFSET as u32,
567             ISR_CONFIG_SIZE as u32,
568         );
569         self.config_regs
570             .add_capability(&isr_cap, None)
571             .map_err(PciDeviceError::CapabilitiesSetup)?;
572 
573         // TODO(dgreid) - set based on device's configuration size?
574         let device_cap = VirtioPciCap::new(
575             PciCapabilityType::DeviceConfig,
576             settings_bar,
577             DEVICE_CONFIG_BAR_OFFSET as u32,
578             DEVICE_CONFIG_SIZE as u32,
579         );
580         self.config_regs
581             .add_capability(&device_cap, None)
582             .map_err(PciDeviceError::CapabilitiesSetup)?;
583 
584         let notify_cap = VirtioPciNotifyCap::new(
585             PciCapabilityType::NotifyConfig,
586             settings_bar,
587             NOTIFICATION_BAR_OFFSET as u32,
588             NOTIFICATION_SIZE as u32,
589             Le32::from(NOTIFY_OFF_MULTIPLIER),
590         );
591         self.config_regs
592             .add_capability(&notify_cap, None)
593             .map_err(PciDeviceError::CapabilitiesSetup)?;
594 
595         //TODO(dgreid) - How will the configuration_cap work?
596         let configuration_cap = VirtioPciCap::new(PciCapabilityType::PciConfig, 0, 0, 0);
597         self.config_regs
598             .add_capability(&configuration_cap, None)
599             .map_err(PciDeviceError::CapabilitiesSetup)?;
600 
601         let msix_cap = MsixCap::new(
602             settings_bar,
603             self.msix_config.lock().num_vectors(),
604             MSIX_TABLE_BAR_OFFSET as u32,
605             settings_bar,
606             MSIX_PBA_BAR_OFFSET as u32,
607         );
608         self.config_regs
609             .add_capability(&msix_cap, Some(Box::new(self.msix_config.clone())))
610             .map_err(PciDeviceError::CapabilitiesSetup)?;
611 
612         self.config_regs
613             .add_capability(&PciPmCap::new(), Some(Box::new(self.pm_config.clone())))
614             .map_err(PciDeviceError::CapabilitiesSetup)?;
615 
616         self.settings_bar = settings_bar as PciBarIndex;
617         Ok(())
618     }
619 
620     /// Activates the underlying `VirtioDevice`. `assign_irq` has to be called first.
activate(&mut self) -> anyhow::Result<()>621     fn activate(&mut self) -> anyhow::Result<()> {
622         let interrupt = Interrupt::new(
623             self.interrupt_evt
624                 .as_ref()
625                 .ok_or_else(|| anyhow!("{} interrupt_evt is none", self.debug_label()))?
626                 .try_clone()
627                 .with_context(|| format!("{} failed to clone interrupt_evt", self.debug_label()))?,
628             Some(self.msix_config.clone()),
629             self.common_config.msix_config,
630             #[cfg(target_arch = "x86_64")]
631             Some((
632                 PmWakeupEvent::new(self.vm_control_tube.clone(), self.pm_config.clone()),
633                 MetricEventType::VirtioWakeup {
634                     virtio_id: self.device.device_type() as u32,
635                 },
636             )),
637         );
638         self.interrupt = Some(interrupt.clone());
639         self.interrupt_resample_worker = interrupt.spawn_resample_thread();
640 
641         let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
642         let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
643 
644         // Use ready queues and their events.
645         let queues = self
646             .queues
647             .iter_mut()
648             .enumerate()
649             .zip(self.queue_evts.iter_mut())
650             .filter(|((_, q), _)| q.ready())
651             .map(|((queue_index, queue), evt)| {
652                 if !evt.ioevent_registered {
653                     self.ioevent_vm_memory_client
654                         .register_io_event(
655                             evt.event.try_clone().context("failed to clone Event")?,
656                             notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
657                             Datamatch::AnyLength,
658                         )
659                         .context("failed to register ioevent")?;
660                     evt.ioevent_registered = true;
661                 }
662                 let queue_evt = evt.event.try_clone().context("failed to clone queue_evt")?;
663                 Ok((
664                     queue_index,
665                     queue
666                         .activate(&self.mem, queue_evt, interrupt.clone())
667                         .context("failed to activate queue")?,
668                 ))
669             })
670             .collect::<anyhow::Result<BTreeMap<usize, Queue>>>()?;
671 
672         if let Err(e) = self.device.activate(self.mem.clone(), interrupt, queues) {
673             error!("{} activate failed: {:#}", self.debug_label(), e);
674             self.common_config.driver_status |= VIRTIO_CONFIG_S_NEEDS_RESET as u8;
675         } else {
676             self.device_activated = true;
677         }
678 
679         Ok(())
680     }
681 
unregister_ioevents(&mut self) -> anyhow::Result<()>682     fn unregister_ioevents(&mut self) -> anyhow::Result<()> {
683         let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
684         let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
685 
686         for (queue_index, evt) in self.queue_evts.iter_mut().enumerate() {
687             if evt.ioevent_registered {
688                 self.ioevent_vm_memory_client
689                     .unregister_io_event(
690                         evt.event.try_clone().context("failed to clone Event")?,
691                         notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
692                         Datamatch::AnyLength,
693                     )
694                     .context("failed to unregister ioevent")?;
695                 evt.ioevent_registered = false;
696             }
697         }
698         Ok(())
699     }
700 
virtio_device(&self) -> &dyn VirtioDevice701     pub fn virtio_device(&self) -> &dyn VirtioDevice {
702         self.device.as_ref()
703     }
704 
pci_address(&self) -> Option<PciAddress>705     pub fn pci_address(&self) -> Option<PciAddress> {
706         self.pci_address
707     }
708 
709     #[cfg(target_arch = "x86_64")]
handle_pm_status_change(&mut self, status: &PmStatusChange)710     fn handle_pm_status_change(&mut self, status: &PmStatusChange) {
711         if let Some(interrupt) = self.interrupt.as_mut() {
712             interrupt.set_wakeup_event_active(status.to == PciDevicePower::D3)
713         }
714     }
715 
716     #[cfg(not(target_arch = "x86_64"))]
handle_pm_status_change(&mut self, _status: &PmStatusChange)717     fn handle_pm_status_change(&mut self, _status: &PmStatusChange) {}
718 }
719 
720 impl PciDevice for VirtioPciDevice {
debug_label(&self) -> String721     fn debug_label(&self) -> String {
722         format!("pci{}", self.device.debug_label())
723     }
724 
preferred_address(&self) -> Option<PciAddress>725     fn preferred_address(&self) -> Option<PciAddress> {
726         self.preferred_address
727     }
728 
allocate_address( &mut self, resources: &mut SystemAllocator, ) -> std::result::Result<PciAddress, PciDeviceError>729     fn allocate_address(
730         &mut self,
731         resources: &mut SystemAllocator,
732     ) -> std::result::Result<PciAddress, PciDeviceError> {
733         if self.pci_address.is_none() {
734             if let Some(address) = self.preferred_address {
735                 if !resources.reserve_pci(address, self.debug_label()) {
736                     return Err(PciDeviceError::PciAllocationFailed);
737                 }
738                 self.pci_address = Some(address);
739             } else {
740                 self.pci_address = resources.allocate_pci(0, self.debug_label());
741             }
742         }
743         self.pci_address.ok_or(PciDeviceError::PciAllocationFailed)
744     }
745 
keep_rds(&self) -> Vec<RawDescriptor>746     fn keep_rds(&self) -> Vec<RawDescriptor> {
747         let mut rds = self.device.keep_rds();
748         rds.extend(
749             self.queue_evts
750                 .iter()
751                 .map(|qe| qe.event.as_raw_descriptor()),
752         );
753         if let Some(interrupt_evt) = &self.interrupt_evt {
754             rds.extend(interrupt_evt.as_raw_descriptors());
755         }
756         let descriptor = self.msix_config.lock().get_msi_socket();
757         rds.push(descriptor);
758         if let Some(iommu) = &self.iommu {
759             rds.append(&mut iommu.lock().as_raw_descriptors());
760         }
761         rds.push(self.ioevent_vm_memory_client.as_raw_descriptor());
762         rds.push(self.vm_control_tube.lock().as_raw_descriptor());
763         rds
764     }
765 
assign_irq(&mut self, irq_evt: IrqLevelEvent, pin: PciInterruptPin, irq_num: u32)766     fn assign_irq(&mut self, irq_evt: IrqLevelEvent, pin: PciInterruptPin, irq_num: u32) {
767         self.interrupt_evt = Some(irq_evt);
768         if !self.disable_intx {
769             self.config_regs.set_irq(irq_num as u8, pin);
770         }
771     }
772 
allocate_io_bars( &mut self, resources: &mut SystemAllocator, ) -> std::result::Result<Vec<BarRange>, PciDeviceError>773     fn allocate_io_bars(
774         &mut self,
775         resources: &mut SystemAllocator,
776     ) -> std::result::Result<Vec<BarRange>, PciDeviceError> {
777         let device_type = self.device.device_type();
778         allocate_io_bars(
779             self,
780             |size: u64, alloc: Alloc, alloc_option: &AllocOptions| {
781                 resources
782                     .allocate_mmio(
783                         size,
784                         alloc,
785                         format!("virtio-{}-cap_bar", device_type),
786                         alloc_option,
787                     )
788                     .map_err(|e| PciDeviceError::IoAllocationFailed(size, e))
789             },
790         )
791     }
792 
allocate_device_bars( &mut self, resources: &mut SystemAllocator, ) -> std::result::Result<Vec<BarRange>, PciDeviceError>793     fn allocate_device_bars(
794         &mut self,
795         resources: &mut SystemAllocator,
796     ) -> std::result::Result<Vec<BarRange>, PciDeviceError> {
797         let device_type = self.device.device_type();
798         allocate_device_bars(
799             self,
800             |size: u64, alloc: Alloc, alloc_option: &AllocOptions| {
801                 resources
802                     .allocate_mmio(
803                         size,
804                         alloc,
805                         format!("virtio-{}-custom_bar", device_type),
806                         alloc_option,
807                     )
808                     .map_err(|e| PciDeviceError::IoAllocationFailed(size, e))
809             },
810         )
811     }
812 
destroy_device(&mut self)813     fn destroy_device(&mut self) {
814         if let Err(e) = self.unregister_ioevents() {
815             error!("error destroying {}: {:?}", &self.debug_label(), &e);
816         }
817     }
818 
get_bar_configuration(&self, bar_num: usize) -> Option<PciBarConfiguration>819     fn get_bar_configuration(&self, bar_num: usize) -> Option<PciBarConfiguration> {
820         self.config_regs.get_bar_configuration(bar_num)
821     }
822 
register_device_capabilities(&mut self) -> std::result::Result<(), PciDeviceError>823     fn register_device_capabilities(&mut self) -> std::result::Result<(), PciDeviceError> {
824         let mut caps = self.device.get_device_caps();
825         if let Some(region) = self.device.get_shared_memory_region() {
826             caps.push(Box::new(VirtioPciShmCap::new(
827                 PciCapabilityType::SharedMemoryConfig,
828                 SHMEM_BAR_NUM as u8,
829                 0,
830                 region.length,
831                 region.id,
832             )));
833         }
834 
835         for cap in caps {
836             self.config_regs
837                 .add_capability(&*cap, None)
838                 .map_err(PciDeviceError::CapabilitiesSetup)?;
839         }
840 
841         Ok(())
842     }
843 
read_config_register(&self, reg_idx: usize) -> u32844     fn read_config_register(&self, reg_idx: usize) -> u32 {
845         self.config_regs.read_reg(reg_idx)
846     }
847 
write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8])848     fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) {
849         if let Some(res) = self.config_regs.write_reg(reg_idx, offset, data) {
850             if let Some(msix_behavior) = res.downcast_ref::<MsixStatus>() {
851                 self.device.control_notify(*msix_behavior);
852             } else if let Some(status) = res.downcast_ref::<PmStatusChange>() {
853                 self.handle_pm_status_change(status);
854             }
855         }
856     }
857 
setup_pci_config_mapping( &mut self, shmem: &SharedMemory, base: usize, len: usize, ) -> std::result::Result<bool, PciDeviceError>858     fn setup_pci_config_mapping(
859         &mut self,
860         shmem: &SharedMemory,
861         base: usize,
862         len: usize,
863     ) -> std::result::Result<bool, PciDeviceError> {
864         self.config_regs
865             .setup_mapping(shmem, base, len)
866             .map(|_| true)
867             .map_err(PciDeviceError::MmioSetup)
868     }
869 
read_bar(&mut self, bar_index: usize, offset: u64, data: &mut [u8])870     fn read_bar(&mut self, bar_index: usize, offset: u64, data: &mut [u8]) {
871         if bar_index == self.settings_bar {
872             match offset {
873                 COMMON_CONFIG_BAR_OFFSET..=COMMON_CONFIG_LAST => self.common_config.read(
874                     offset - COMMON_CONFIG_BAR_OFFSET,
875                     data,
876                     &mut self.queues,
877                     self.device.as_mut(),
878                 ),
879                 ISR_CONFIG_BAR_OFFSET..=ISR_CONFIG_LAST => {
880                     if let Some(v) = data.get_mut(0) {
881                         // Reading this register resets it to 0.
882                         *v = if let Some(interrupt) = &self.interrupt {
883                             interrupt.read_and_reset_interrupt_status()
884                         } else {
885                             0
886                         };
887                     }
888                 }
889                 DEVICE_CONFIG_BAR_OFFSET..=DEVICE_CONFIG_LAST => {
890                     self.device
891                         .read_config(offset - DEVICE_CONFIG_BAR_OFFSET, data);
892                 }
893                 NOTIFICATION_BAR_OFFSET..=NOTIFICATION_LAST => {
894                     // Handled with ioevents.
895                 }
896                 MSIX_TABLE_BAR_OFFSET..=MSIX_TABLE_LAST => {
897                     self.msix_config
898                         .lock()
899                         .read_msix_table(offset - MSIX_TABLE_BAR_OFFSET, data);
900                 }
901                 MSIX_PBA_BAR_OFFSET..=MSIX_PBA_LAST => {
902                     self.msix_config
903                         .lock()
904                         .read_pba_entries(offset - MSIX_PBA_BAR_OFFSET, data);
905                 }
906                 _ => (),
907             }
908         }
909     }
910 
write_bar(&mut self, bar_index: usize, offset: u64, data: &[u8])911     fn write_bar(&mut self, bar_index: usize, offset: u64, data: &[u8]) {
912         let was_suspended = self.is_device_suspended();
913 
914         if bar_index == self.settings_bar {
915             match offset {
916                 COMMON_CONFIG_BAR_OFFSET..=COMMON_CONFIG_LAST => self.common_config.write(
917                     offset - COMMON_CONFIG_BAR_OFFSET,
918                     data,
919                     &mut self.queues,
920                     self.device.as_mut(),
921                 ),
922                 ISR_CONFIG_BAR_OFFSET..=ISR_CONFIG_LAST => {
923                     if let Some(v) = data.first() {
924                         if let Some(interrupt) = &self.interrupt {
925                             interrupt.clear_interrupt_status_bits(*v);
926                         }
927                     }
928                 }
929                 DEVICE_CONFIG_BAR_OFFSET..=DEVICE_CONFIG_LAST => {
930                     self.device
931                         .write_config(offset - DEVICE_CONFIG_BAR_OFFSET, data);
932                 }
933                 NOTIFICATION_BAR_OFFSET..=NOTIFICATION_LAST => {
934                     // Notifications are normally handled with ioevents inside the hypervisor and
935                     // do not reach write_bar(). However, if the ioevent registration hasn't
936                     // finished yet, it is possible for a write to the notification region to make
937                     // it through as a normal MMIO exit and end up here. To handle that case,
938                     // provide a fallback that looks up the corresponding queue for the offset and
939                     // triggers its event, which is equivalent to what the ioevent would do.
940                     let queue_index = (offset - NOTIFICATION_BAR_OFFSET) as usize
941                         / NOTIFY_OFF_MULTIPLIER as usize;
942                     trace!("write_bar notification fallback for queue {}", queue_index);
943                     if let Some(evt) = self.queue_evts.get(queue_index) {
944                         let _ = evt.event.signal();
945                     }
946                 }
947                 MSIX_TABLE_BAR_OFFSET..=MSIX_TABLE_LAST => {
948                     let behavior = self
949                         .msix_config
950                         .lock()
951                         .write_msix_table(offset - MSIX_TABLE_BAR_OFFSET, data);
952                     self.device.control_notify(behavior);
953                 }
954                 MSIX_PBA_BAR_OFFSET..=MSIX_PBA_LAST => {
955                     self.msix_config
956                         .lock()
957                         .write_pba_entries(offset - MSIX_PBA_BAR_OFFSET, data);
958                 }
959                 _ => (),
960             }
961         }
962 
963         if !self.device_activated && self.is_driver_ready() {
964             if let Err(e) = self.activate() {
965                 error!("failed to activate device: {:#}", e);
966             }
967         }
968 
969         let is_suspended = self.is_device_suspended();
970         if is_suspended != was_suspended {
971             if let Some(interrupt) = self.interrupt.as_mut() {
972                 interrupt.set_suspended(is_suspended);
973             }
974         }
975 
976         // Device has been reset by the driver
977         if self.device_activated && self.is_reset_requested() {
978             if let Err(e) = self.device.reset() {
979                 error!("failed to reset {} device: {:#}", self.debug_label(), e);
980             } else {
981                 self.device_activated = false;
982                 // reset queues
983                 self.queues.iter_mut().for_each(QueueConfig::reset);
984                 // select queue 0 by default
985                 self.common_config.queue_select = 0;
986                 if let Err(e) = self.unregister_ioevents() {
987                     error!("failed to unregister ioevents: {:#}", e);
988                 }
989                 if let Some(interrupt_resample_worker) = self.interrupt_resample_worker.take() {
990                     interrupt_resample_worker.stop();
991                 }
992             }
993         }
994     }
995 
on_device_sandboxed(&mut self)996     fn on_device_sandboxed(&mut self) {
997         self.device.on_device_sandboxed();
998     }
999 
1000     #[cfg(target_arch = "x86_64")]
generate_acpi(&mut self, sdts: &mut Vec<SDT>)1001     fn generate_acpi(&mut self, sdts: &mut Vec<SDT>) {
1002         self.device.generate_acpi(
1003             self.pci_address.expect("pci_address must be assigned"),
1004             sdts,
1005         )
1006     }
1007 
as_virtio_pci_device(&self) -> Option<&VirtioPciDevice>1008     fn as_virtio_pci_device(&self) -> Option<&VirtioPciDevice> {
1009         Some(self)
1010     }
1011 }
1012 
allocate_io_bars<F>( virtio_pci_device: &mut VirtioPciDevice, mut alloc_fn: F, ) -> std::result::Result<Vec<BarRange>, PciDeviceError> where F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,1013 fn allocate_io_bars<F>(
1014     virtio_pci_device: &mut VirtioPciDevice,
1015     mut alloc_fn: F,
1016 ) -> std::result::Result<Vec<BarRange>, PciDeviceError>
1017 where
1018     F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,
1019 {
1020     let address = virtio_pci_device
1021         .pci_address
1022         .expect("allocate_address must be called prior to allocate_io_bars");
1023     // Allocate one bar for the structures pointed to by the capability structures.
1024     let settings_config_addr = alloc_fn(
1025         CAPABILITY_BAR_SIZE,
1026         Alloc::PciBar {
1027             bus: address.bus,
1028             dev: address.dev,
1029             func: address.func,
1030             bar: 0,
1031         },
1032         AllocOptions::new()
1033             .max_address(u32::MAX.into())
1034             .align(CAPABILITY_BAR_SIZE),
1035     )?;
1036     let config = PciBarConfiguration::new(
1037         CAPABILITIES_BAR_NUM,
1038         CAPABILITY_BAR_SIZE,
1039         PciBarRegionType::Memory32BitRegion,
1040         PciBarPrefetchable::NotPrefetchable,
1041     )
1042     .set_address(settings_config_addr);
1043     let settings_bar = virtio_pci_device
1044         .config_regs
1045         .add_pci_bar(config)
1046         .map_err(|e| PciDeviceError::IoRegistrationFailed(settings_config_addr, e))?
1047         as u8;
1048     // Once the BARs are allocated, the capabilities can be added to the PCI configuration.
1049     virtio_pci_device.add_settings_pci_capabilities(settings_bar)?;
1050 
1051     Ok(vec![BarRange {
1052         addr: settings_config_addr,
1053         size: CAPABILITY_BAR_SIZE,
1054         prefetchable: false,
1055     }])
1056 }
1057 
allocate_device_bars<F>( virtio_pci_device: &mut VirtioPciDevice, mut alloc_fn: F, ) -> std::result::Result<Vec<BarRange>, PciDeviceError> where F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,1058 fn allocate_device_bars<F>(
1059     virtio_pci_device: &mut VirtioPciDevice,
1060     mut alloc_fn: F,
1061 ) -> std::result::Result<Vec<BarRange>, PciDeviceError>
1062 where
1063     F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,
1064 {
1065     let address = virtio_pci_device
1066         .pci_address
1067         .expect("allocate_address must be called prior to allocate_device_bars");
1068 
1069     let configs = virtio_pci_device.device.get_device_bars(address);
1070     let configs = if !configs.is_empty() {
1071         configs
1072     } else {
1073         let region = match virtio_pci_device.device.get_shared_memory_region() {
1074             None => return Ok(Vec::new()),
1075             Some(r) => r,
1076         };
1077         let config = PciBarConfiguration::new(
1078             SHMEM_BAR_NUM,
1079             region
1080                 .length
1081                 .checked_next_power_of_two()
1082                 .expect("bar too large"),
1083             PciBarRegionType::Memory64BitRegion,
1084             PciBarPrefetchable::Prefetchable,
1085         );
1086 
1087         let alloc = Alloc::PciBar {
1088             bus: address.bus,
1089             dev: address.dev,
1090             func: address.func,
1091             bar: config.bar_index() as u8,
1092         };
1093 
1094         let vm_memory_client = virtio_pci_device
1095             .shared_memory_vm_memory_client
1096             .take()
1097             .expect("missing shared_memory_tube");
1098 
1099         // See comment VmMemoryRequest::execute
1100         let can_prepare = !virtio_pci_device
1101             .device
1102             .expose_shmem_descriptors_with_viommu();
1103         let prepare_type = if can_prepare {
1104             virtio_pci_device.device.get_shared_memory_prepare_type()
1105         } else {
1106             SharedMemoryPrepareType::DynamicPerMapping
1107         };
1108 
1109         let vm_requester = Box::new(VmRequester::new(vm_memory_client, alloc, prepare_type));
1110         virtio_pci_device
1111             .device
1112             .set_shared_memory_mapper(vm_requester);
1113 
1114         vec![config]
1115     };
1116     let mut ranges = vec![];
1117     for config in configs {
1118         let device_addr = alloc_fn(
1119             config.size(),
1120             Alloc::PciBar {
1121                 bus: address.bus,
1122                 dev: address.dev,
1123                 func: address.func,
1124                 bar: config.bar_index() as u8,
1125             },
1126             AllocOptions::new()
1127                 .prefetchable(config.is_prefetchable())
1128                 .align(config.size()),
1129         )?;
1130         let config = config.set_address(device_addr);
1131         let _device_bar = virtio_pci_device
1132             .config_regs
1133             .add_pci_bar(config)
1134             .map_err(|e| PciDeviceError::IoRegistrationFailed(device_addr, e))?;
1135         ranges.push(BarRange {
1136             addr: device_addr,
1137             size: config.size(),
1138             prefetchable: false,
1139         });
1140     }
1141 
1142     if virtio_pci_device
1143         .device
1144         .get_shared_memory_region()
1145         .is_some()
1146     {
1147         let shmem_region = AddressRange::from_start_and_size(ranges[0].addr, ranges[0].size)
1148             .expect("invalid shmem region");
1149         virtio_pci_device
1150             .device
1151             .set_shared_memory_region(shmem_region);
1152     }
1153 
1154     Ok(ranges)
1155 }
1156 
1157 #[cfg(feature = "pci-hotplug")]
1158 impl HotPluggable for VirtioPciDevice {
1159     /// Sets PciAddress to pci_addr
set_pci_address(&mut self, pci_addr: PciAddress) -> std::result::Result<(), PciDeviceError>1160     fn set_pci_address(&mut self, pci_addr: PciAddress) -> std::result::Result<(), PciDeviceError> {
1161         self.pci_address = Some(pci_addr);
1162         Ok(())
1163     }
1164 
1165     /// Configures IO BAR layout without memory alloc.
configure_io_bars(&mut self) -> std::result::Result<(), PciDeviceError>1166     fn configure_io_bars(&mut self) -> std::result::Result<(), PciDeviceError> {
1167         let mut simple_allocator = SimpleAllocator::new(0);
1168         allocate_io_bars(self, |size, _, _| simple_allocator.alloc(size, size)).map(|_| ())
1169     }
1170 
1171     /// Configure device BAR layout without memory alloc.
configure_device_bars(&mut self) -> std::result::Result<(), PciDeviceError>1172     fn configure_device_bars(&mut self) -> std::result::Result<(), PciDeviceError> {
1173         // For device BAR, the space for CAPABILITY_BAR_SIZE should be skipped.
1174         let mut simple_allocator = SimpleAllocator::new(CAPABILITY_BAR_SIZE);
1175         allocate_device_bars(self, |size, _, _| simple_allocator.alloc(size, size)).map(|_| ())
1176     }
1177 }
1178 
1179 #[cfg(feature = "pci-hotplug")]
1180 /// A simple allocator that can allocate non-overlapping aligned intervals.
1181 ///
1182 /// The addresses allocated are not exclusively reserved for the device, and cannot be used for a
1183 /// static device. The allocated placeholder address describes the layout of PCI BAR for hotplugged
1184 /// devices. Actual memory allocation is handled by PCI BAR reprogramming initiated by guest OS.
1185 struct SimpleAllocator {
1186     current_address: u64,
1187 }
1188 
1189 #[cfg(feature = "pci-hotplug")]
1190 impl SimpleAllocator {
1191     /// Constructs SimpleAllocator. Address will start at or after base_address.
new(base_address: u64) -> Self1192     fn new(base_address: u64) -> Self {
1193         Self {
1194             current_address: base_address,
1195         }
1196     }
1197 
1198     /// Allocate memory with size and align. Returns the start of address.
alloc(&mut self, size: u64, align: u64) -> std::result::Result<u64, PciDeviceError>1199     fn alloc(&mut self, size: u64, align: u64) -> std::result::Result<u64, PciDeviceError> {
1200         if align > 0 {
1201             // aligns current_address upward to align.
1202             self.current_address = self.current_address.next_multiple_of(align);
1203         }
1204         let start_address = self.current_address;
1205         self.current_address += size;
1206         Ok(start_address)
1207     }
1208 }
1209 
1210 impl Suspendable for VirtioPciDevice {
sleep(&mut self) -> anyhow::Result<()>1211     fn sleep(&mut self) -> anyhow::Result<()> {
1212         // If the device is already asleep, we should not request it to sleep again.
1213         if self.sleep_state.is_some() {
1214             return Ok(());
1215         }
1216 
1217         if let Some(queues) = self.device.virtio_sleep()? {
1218             anyhow::ensure!(
1219                 self.device_activated,
1220                 format!(
1221                     "unactivated device {} returned queues on sleep",
1222                     self.debug_label()
1223                 ),
1224             );
1225             self.sleep_state = Some(SleepState::Active {
1226                 activated_queues: queues,
1227             });
1228         } else {
1229             anyhow::ensure!(
1230                 !self.device_activated,
1231                 format!(
1232                     "activated device {} didn't return queues on sleep",
1233                     self.debug_label()
1234                 ),
1235             );
1236             self.sleep_state = Some(SleepState::Inactive);
1237         }
1238         Ok(())
1239     }
1240 
wake(&mut self) -> anyhow::Result<()>1241     fn wake(&mut self) -> anyhow::Result<()> {
1242         match self.sleep_state.take() {
1243             None => {
1244                 // If the device is already awake, we should not request it to wake again.
1245             }
1246             Some(SleepState::Inactive) => {
1247                 self.device.virtio_wake(None).with_context(|| {
1248                     format!(
1249                         "virtio_wake failed for {}, can't recover",
1250                         self.debug_label(),
1251                     )
1252                 })?;
1253             }
1254             Some(SleepState::Active { activated_queues }) => {
1255                 self.device
1256                     .virtio_wake(Some((
1257                         self.mem.clone(),
1258                         self.interrupt
1259                             .clone()
1260                             .expect("interrupt missing for already active queues"),
1261                         activated_queues,
1262                     )))
1263                     .with_context(|| {
1264                         format!(
1265                             "virtio_wake failed for {}, can't recover",
1266                             self.debug_label(),
1267                         )
1268                     })?;
1269             }
1270         };
1271         Ok(())
1272     }
1273 
snapshot(&mut self) -> anyhow::Result<AnySnapshot>1274     fn snapshot(&mut self) -> anyhow::Result<AnySnapshot> {
1275         if self.iommu.is_some() {
1276             return Err(anyhow!("Cannot snapshot if iommu is present."));
1277         }
1278 
1279         AnySnapshot::to_any(VirtioPciDeviceSnapshot {
1280             config_regs: self.config_regs.snapshot()?,
1281             inner_device: self.device.virtio_snapshot()?,
1282             device_activated: self.device_activated,
1283             interrupt: self.interrupt.as_ref().map(|i| i.snapshot()),
1284             msix_config: self.msix_config.lock().snapshot()?,
1285             common_config: self.common_config,
1286             queues: self
1287                 .queues
1288                 .iter()
1289                 .map(|q| q.snapshot())
1290                 .collect::<anyhow::Result<Vec<_>>>()?,
1291             activated_queues: match &self.sleep_state {
1292                 None => {
1293                     anyhow::bail!("tried snapshotting while awake")
1294                 }
1295                 Some(SleepState::Inactive) => None,
1296                 Some(SleepState::Active { activated_queues }) => {
1297                     let mut serialized_queues = Vec::new();
1298                     for (index, queue) in activated_queues.iter() {
1299                         serialized_queues.push((*index, queue.snapshot()?));
1300                     }
1301                     Some(serialized_queues)
1302                 }
1303             },
1304         })
1305         .context("failed to serialize VirtioPciDeviceSnapshot")
1306     }
1307 
restore(&mut self, data: AnySnapshot) -> anyhow::Result<()>1308     fn restore(&mut self, data: AnySnapshot) -> anyhow::Result<()> {
1309         // Restoring from an activated state is more complex and low priority, so just fail for
1310         // now. We'll need to reset the device before restoring, e.g. must call
1311         // self.unregister_ioevents().
1312         anyhow::ensure!(
1313             !self.device_activated,
1314             "tried to restore after virtio device activated. not supported yet"
1315         );
1316 
1317         let deser: VirtioPciDeviceSnapshot = AnySnapshot::from_any(data)?;
1318 
1319         self.config_regs.restore(deser.config_regs)?;
1320         self.device_activated = deser.device_activated;
1321 
1322         self.msix_config.lock().restore(deser.msix_config)?;
1323         self.common_config = deser.common_config;
1324 
1325         // Restore the interrupt. This must be done after restoring the MSI-X configuration, but
1326         // before restoring the queues.
1327         if let Some(deser_interrupt) = deser.interrupt {
1328             let interrupt = Interrupt::new_from_snapshot(
1329                 self.interrupt_evt
1330                     .as_ref()
1331                     .ok_or_else(|| anyhow!("{} interrupt_evt is none", self.debug_label()))?
1332                     .try_clone()
1333                     .with_context(|| {
1334                         format!("{} failed to clone interrupt_evt", self.debug_label())
1335                     })?,
1336                 Some(self.msix_config.clone()),
1337                 self.common_config.msix_config,
1338                 deser_interrupt,
1339                 #[cfg(target_arch = "x86_64")]
1340                 Some((
1341                     PmWakeupEvent::new(self.vm_control_tube.clone(), self.pm_config.clone()),
1342                     MetricEventType::VirtioWakeup {
1343                         virtio_id: self.device.device_type() as u32,
1344                     },
1345                 )),
1346             );
1347             self.interrupt_resample_worker = interrupt.spawn_resample_thread();
1348             self.interrupt = Some(interrupt);
1349         }
1350 
1351         assert_eq!(
1352             self.queues.len(),
1353             deser.queues.len(),
1354             "device must have the same number of queues"
1355         );
1356         for (q, s) in self.queues.iter_mut().zip(deser.queues.into_iter()) {
1357             q.restore(s)?;
1358         }
1359 
1360         // Verify we are asleep and inactive.
1361         match &self.sleep_state {
1362             None => {
1363                 anyhow::bail!("tried restoring while awake")
1364             }
1365             Some(SleepState::Inactive) => {}
1366             Some(SleepState::Active { .. }) => {
1367                 anyhow::bail!("tried to restore after virtio device activated. not supported yet")
1368             }
1369         };
1370         // Restore `sleep_state`.
1371         if let Some(activated_queues_snapshot) = deser.activated_queues {
1372             let interrupt = self
1373                 .interrupt
1374                 .as_ref()
1375                 .context("tried to restore active queues without an interrupt")?;
1376             let mut activated_queues = BTreeMap::new();
1377             for (index, queue_snapshot) in activated_queues_snapshot {
1378                 let queue_config = self
1379                     .queues
1380                     .get(index)
1381                     .with_context(|| format!("missing queue config for activated queue {index}"))?;
1382                 let queue_evt = self
1383                     .queue_evts
1384                     .get(index)
1385                     .with_context(|| format!("missing queue event for activated queue {index}"))?
1386                     .event
1387                     .try_clone()
1388                     .context("failed to clone queue event")?;
1389                 activated_queues.insert(
1390                     index,
1391                     Queue::restore(
1392                         queue_config,
1393                         queue_snapshot,
1394                         &self.mem,
1395                         queue_evt,
1396                         interrupt.clone(),
1397                     )?,
1398                 );
1399             }
1400 
1401             // Restore the activated queues.
1402             self.sleep_state = Some(SleepState::Active { activated_queues });
1403         } else {
1404             self.sleep_state = Some(SleepState::Inactive);
1405         }
1406 
1407         // Call register_io_events for the activated queue events.
1408         let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
1409         let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
1410         self.queues
1411             .iter()
1412             .enumerate()
1413             .zip(self.queue_evts.iter_mut())
1414             .filter(|((_, q), _)| q.ready())
1415             .try_for_each(|((queue_index, _queue), evt)| {
1416                 if !evt.ioevent_registered {
1417                     self.ioevent_vm_memory_client
1418                         .register_io_event(
1419                             evt.event.try_clone().context("failed to clone Event")?,
1420                             notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
1421                             Datamatch::AnyLength,
1422                         )
1423                         .context("failed to register ioevent")?;
1424                     evt.ioevent_registered = true;
1425                 }
1426                 Ok::<(), anyhow::Error>(())
1427             })?;
1428 
1429         // There might be data in the queue that wasn't drained by the device
1430         // at the time it was snapshotted. In this case, the doorbell should
1431         // still be signaled. If it is not, the driver may never re-trigger the
1432         // doorbell, and the device will stall. So here, we explicitly signal
1433         // every doorbell. Spurious doorbells are safe (devices will check their
1434         // queue, realize nothing is there, and go back to sleep.)
1435         self.queue_evts.iter_mut().try_for_each(|queue_event| {
1436             queue_event
1437                 .event
1438                 .signal()
1439                 .context("failed to wake doorbell")
1440         })?;
1441 
1442         self.device.virtio_restore(deser.inner_device)?;
1443 
1444         Ok(())
1445     }
1446 }
1447 
1448 struct VmRequester {
1449     vm_memory_client: VmMemoryClient,
1450     alloc: Alloc,
1451     mappings: BTreeMap<u64, VmMemoryRegionId>,
1452     prepare_type: SharedMemoryPrepareType,
1453     prepared: bool,
1454 }
1455 
1456 impl VmRequester {
new( vm_memory_client: VmMemoryClient, alloc: Alloc, prepare_type: SharedMemoryPrepareType, ) -> Self1457     fn new(
1458         vm_memory_client: VmMemoryClient,
1459         alloc: Alloc,
1460         prepare_type: SharedMemoryPrepareType,
1461     ) -> Self {
1462         Self {
1463             vm_memory_client,
1464             alloc,
1465             mappings: BTreeMap::new(),
1466             prepare_type,
1467             prepared: false,
1468         }
1469     }
1470 }
1471 
1472 impl SharedMemoryMapper for VmRequester {
add_mapping( &mut self, source: VmMemorySource, offset: u64, prot: Protection, cache: MemCacheType, ) -> anyhow::Result<()>1473     fn add_mapping(
1474         &mut self,
1475         source: VmMemorySource,
1476         offset: u64,
1477         prot: Protection,
1478         cache: MemCacheType,
1479     ) -> anyhow::Result<()> {
1480         if !self.prepared {
1481             if let SharedMemoryPrepareType::SingleMappingOnFirst(prepare_cache_type) =
1482                 self.prepare_type
1483             {
1484                 debug!(
1485                     "lazy prepare_shared_memory_region with {:?}",
1486                     prepare_cache_type
1487                 );
1488                 self.vm_memory_client
1489                     .prepare_shared_memory_region(self.alloc, prepare_cache_type)
1490                     .context("lazy prepare_shared_memory_region failed")?;
1491             }
1492             self.prepared = true;
1493         }
1494 
1495         // devices must implement VirtioDevice::get_shared_memory_prepare_type(), returning
1496         // SharedMemoryPrepareType::SingleMappingOnFirst(MemCacheType::CacheNonCoherent) in order to
1497         // add any mapping that requests MemCacheType::CacheNonCoherent.
1498         if cache == MemCacheType::CacheNonCoherent {
1499             if let SharedMemoryPrepareType::SingleMappingOnFirst(MemCacheType::CacheCoherent) =
1500                 self.prepare_type
1501             {
1502                 error!("invalid request to map with CacheNonCoherent for device with prepared CacheCoherent memory");
1503                 return Err(anyhow!("invalid MemCacheType"));
1504             }
1505         }
1506 
1507         let id = self
1508             .vm_memory_client
1509             .register_memory(
1510                 source,
1511                 VmMemoryDestination::ExistingAllocation {
1512                     allocation: self.alloc,
1513                     offset,
1514                 },
1515                 prot,
1516                 cache,
1517             )
1518             .context("register_memory failed")?;
1519 
1520         self.mappings.insert(offset, id);
1521         Ok(())
1522     }
1523 
remove_mapping(&mut self, offset: u64) -> anyhow::Result<()>1524     fn remove_mapping(&mut self, offset: u64) -> anyhow::Result<()> {
1525         let id = self.mappings.remove(&offset).context("invalid offset")?;
1526         self.vm_memory_client
1527             .unregister_memory(id)
1528             .context("unregister_memory failed")
1529     }
1530 
as_raw_descriptor(&self) -> Option<RawDescriptor>1531     fn as_raw_descriptor(&self) -> Option<RawDescriptor> {
1532         Some(self.vm_memory_client.as_raw_descriptor())
1533     }
1534 }
1535 
1536 #[cfg(test)]
1537 mod tests {
1538 
1539     #[cfg(feature = "pci-hotplug")]
1540     #[test]
allocate_aligned_address()1541     fn allocate_aligned_address() {
1542         let mut simple_allocator = super::SimpleAllocator::new(0);
1543         // start at 0, aligned to 0x80. Interval end at 0x20.
1544         assert_eq!(simple_allocator.alloc(0x20, 0x80).unwrap(), 0);
1545         // 0x20 => start at 0x40. Interval end at 0x80.
1546         assert_eq!(simple_allocator.alloc(0x40, 0x40).unwrap(), 0x40);
1547         // 0x80 => start at 0x80, Interval end at 0x108.
1548         assert_eq!(simple_allocator.alloc(0x88, 0x80).unwrap(), 0x80);
1549         // 0x108 => start at 0x180. Interval end at 0x1b0.
1550         assert_eq!(simple_allocator.alloc(0x30, 0x80).unwrap(), 0x180);
1551     }
1552 }
1553