• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::collections::BTreeMap;
6 use std::sync::Arc;
7 
8 #[cfg(target_arch = "x86_64")]
9 use acpi_tables::sdt::SDT;
10 use anyhow::anyhow;
11 use anyhow::Context;
12 use base::error;
13 use base::trace;
14 use base::AsRawDescriptor;
15 use base::AsRawDescriptors;
16 use base::Event;
17 use base::Protection;
18 use base::RawDescriptor;
19 use base::Result;
20 use base::SharedMemory;
21 use base::Tube;
22 use data_model::Le32;
23 use hypervisor::Datamatch;
24 use hypervisor::MemCacheType;
25 use libc::ERANGE;
26 #[cfg(target_arch = "x86_64")]
27 use metrics::MetricEventType;
28 use resources::Alloc;
29 use resources::AllocOptions;
30 use resources::SystemAllocator;
31 use serde::Deserialize;
32 use serde::Serialize;
33 use sync::Mutex;
34 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_ACKNOWLEDGE;
35 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_DRIVER;
36 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_DRIVER_OK;
37 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_FAILED;
38 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_FEATURES_OK;
39 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_NEEDS_RESET;
40 use vm_control::api::VmMemoryClient;
41 use vm_control::VmMemoryDestination;
42 use vm_control::VmMemoryRegionId;
43 use vm_control::VmMemorySource;
44 use vm_memory::GuestAddress;
45 use vm_memory::GuestMemory;
46 use zerocopy::AsBytes;
47 use zerocopy::FromBytes;
48 use zerocopy::FromZeroes;
49 
50 use self::virtio_pci_common_config::VirtioPciCommonConfig;
51 use super::*;
52 #[cfg(target_arch = "x86_64")]
53 use crate::acpi::PmWakeupEvent;
54 #[cfg(target_arch = "x86_64")]
55 use crate::pci::pm::PciDevicePower;
56 use crate::pci::pm::PciPmCap;
57 use crate::pci::pm::PmConfig;
58 use crate::pci::pm::PmStatusChange;
59 use crate::pci::BarRange;
60 use crate::pci::MsixCap;
61 use crate::pci::MsixConfig;
62 use crate::pci::MsixStatus;
63 use crate::pci::PciAddress;
64 use crate::pci::PciBarConfiguration;
65 use crate::pci::PciBarIndex;
66 use crate::pci::PciBarPrefetchable;
67 use crate::pci::PciBarRegionType;
68 use crate::pci::PciCapability;
69 use crate::pci::PciCapabilityID;
70 use crate::pci::PciClassCode;
71 use crate::pci::PciConfiguration;
72 use crate::pci::PciDevice;
73 use crate::pci::PciDeviceError;
74 use crate::pci::PciDisplaySubclass;
75 use crate::pci::PciHeaderType;
76 use crate::pci::PciId;
77 use crate::pci::PciInterruptPin;
78 use crate::pci::PciMassStorageSubclass;
79 use crate::pci::PciSubclass;
80 use crate::virtio::ipc_memory_mapper::IpcMemoryMapper;
81 #[cfg(feature = "pci-hotplug")]
82 use crate::HotPluggable;
83 use crate::IrqLevelEvent;
84 use crate::Suspendable;
85 
86 #[repr(u8)]
87 #[derive(Debug, Copy, Clone, enumn::N)]
88 pub enum PciCapabilityType {
89     CommonConfig = 1,
90     NotifyConfig = 2,
91     IsrConfig = 3,
92     DeviceConfig = 4,
93     PciConfig = 5,
94     // Doorbell, Notification and SharedMemory are Virtio Vhost User related PCI
95     // capabilities. Specified in 5.7.7.4 here
96     // https://stefanha.github.io/virtio/vhost-user-slave.html#x1-2830007.
97     DoorbellConfig = 6,
98     NotificationConfig = 7,
99     SharedMemoryConfig = 8,
100 }
101 
102 #[allow(dead_code)]
103 #[repr(C)]
104 #[derive(Clone, Copy, FromZeroes, FromBytes, AsBytes)]
105 pub struct VirtioPciCap {
106     // cap_vndr and cap_next are autofilled based on id() in pci configuration
107     pub cap_vndr: u8, // Generic PCI field: PCI_CAP_ID_VNDR
108     pub cap_next: u8, // Generic PCI field: next ptr
109     pub cap_len: u8,  // Generic PCI field: capability length
110     pub cfg_type: u8, // Identifies the structure.
111     pub bar: u8,      // Where to find it.
112     id: u8,           // Multiple capabilities of the same type
113     padding: [u8; 2], // Pad to full dword.
114     pub offset: Le32, // Offset within bar.
115     pub length: Le32, // Length of the structure, in bytes.
116 }
117 
118 impl PciCapability for VirtioPciCap {
bytes(&self) -> &[u8]119     fn bytes(&self) -> &[u8] {
120         self.as_bytes()
121     }
122 
id(&self) -> PciCapabilityID123     fn id(&self) -> PciCapabilityID {
124         PciCapabilityID::VendorSpecific
125     }
126 
writable_bits(&self) -> Vec<u32>127     fn writable_bits(&self) -> Vec<u32> {
128         vec![0u32; 4]
129     }
130 }
131 
132 impl VirtioPciCap {
new(cfg_type: PciCapabilityType, bar: u8, offset: u32, length: u32) -> Self133     pub fn new(cfg_type: PciCapabilityType, bar: u8, offset: u32, length: u32) -> Self {
134         VirtioPciCap {
135             cap_vndr: 0,
136             cap_next: 0,
137             cap_len: std::mem::size_of::<VirtioPciCap>() as u8,
138             cfg_type: cfg_type as u8,
139             bar,
140             id: 0,
141             padding: [0; 2],
142             offset: Le32::from(offset),
143             length: Le32::from(length),
144         }
145     }
146 
set_cap_len(&mut self, cap_len: u8)147     pub fn set_cap_len(&mut self, cap_len: u8) {
148         self.cap_len = cap_len;
149     }
150 }
151 
152 #[allow(dead_code)]
153 #[repr(C)]
154 #[derive(Clone, Copy, AsBytes, FromZeroes, FromBytes)]
155 pub struct VirtioPciNotifyCap {
156     cap: VirtioPciCap,
157     notify_off_multiplier: Le32,
158 }
159 
160 impl PciCapability for VirtioPciNotifyCap {
bytes(&self) -> &[u8]161     fn bytes(&self) -> &[u8] {
162         self.as_bytes()
163     }
164 
id(&self) -> PciCapabilityID165     fn id(&self) -> PciCapabilityID {
166         PciCapabilityID::VendorSpecific
167     }
168 
writable_bits(&self) -> Vec<u32>169     fn writable_bits(&self) -> Vec<u32> {
170         vec![0u32; 5]
171     }
172 }
173 
174 impl VirtioPciNotifyCap {
new( cfg_type: PciCapabilityType, bar: u8, offset: u32, length: u32, multiplier: Le32, ) -> Self175     pub fn new(
176         cfg_type: PciCapabilityType,
177         bar: u8,
178         offset: u32,
179         length: u32,
180         multiplier: Le32,
181     ) -> Self {
182         VirtioPciNotifyCap {
183             cap: VirtioPciCap {
184                 cap_vndr: 0,
185                 cap_next: 0,
186                 cap_len: std::mem::size_of::<VirtioPciNotifyCap>() as u8,
187                 cfg_type: cfg_type as u8,
188                 bar,
189                 id: 0,
190                 padding: [0; 2],
191                 offset: Le32::from(offset),
192                 length: Le32::from(length),
193             },
194             notify_off_multiplier: multiplier,
195         }
196     }
197 }
198 
199 #[repr(C)]
200 #[derive(Clone, Copy, AsBytes, FromZeroes, FromBytes)]
201 pub struct VirtioPciShmCap {
202     cap: VirtioPciCap,
203     offset_hi: Le32, // Most sig 32 bits of offset
204     length_hi: Le32, // Most sig 32 bits of length
205 }
206 
207 impl PciCapability for VirtioPciShmCap {
bytes(&self) -> &[u8]208     fn bytes(&self) -> &[u8] {
209         self.as_bytes()
210     }
211 
id(&self) -> PciCapabilityID212     fn id(&self) -> PciCapabilityID {
213         PciCapabilityID::VendorSpecific
214     }
215 
writable_bits(&self) -> Vec<u32>216     fn writable_bits(&self) -> Vec<u32> {
217         vec![0u32; 6]
218     }
219 }
220 
221 impl VirtioPciShmCap {
new(cfg_type: PciCapabilityType, bar: u8, offset: u64, length: u64, shmid: u8) -> Self222     pub fn new(cfg_type: PciCapabilityType, bar: u8, offset: u64, length: u64, shmid: u8) -> Self {
223         VirtioPciShmCap {
224             cap: VirtioPciCap {
225                 cap_vndr: 0,
226                 cap_next: 0,
227                 cap_len: std::mem::size_of::<VirtioPciShmCap>() as u8,
228                 cfg_type: cfg_type as u8,
229                 bar,
230                 id: shmid,
231                 padding: [0; 2],
232                 offset: Le32::from(offset as u32),
233                 length: Le32::from(length as u32),
234             },
235             offset_hi: Le32::from((offset >> 32) as u32),
236             length_hi: Le32::from((length >> 32) as u32),
237         }
238     }
239 }
240 
241 /// Subclasses for virtio.
242 #[allow(dead_code)]
243 #[derive(Copy, Clone)]
244 pub enum PciVirtioSubclass {
245     NonTransitionalBase = 0xff,
246 }
247 
248 impl PciSubclass for PciVirtioSubclass {
get_register_value(&self) -> u8249     fn get_register_value(&self) -> u8 {
250         *self as u8
251     }
252 }
253 
254 // Allocate one bar for the structs pointed to by the capability structures.
255 const COMMON_CONFIG_BAR_OFFSET: u64 = 0x0000;
256 const COMMON_CONFIG_SIZE: u64 = 56;
257 const COMMON_CONFIG_LAST: u64 = COMMON_CONFIG_BAR_OFFSET + COMMON_CONFIG_SIZE - 1;
258 const ISR_CONFIG_BAR_OFFSET: u64 = 0x1000;
259 const ISR_CONFIG_SIZE: u64 = 1;
260 const ISR_CONFIG_LAST: u64 = ISR_CONFIG_BAR_OFFSET + ISR_CONFIG_SIZE - 1;
261 const DEVICE_CONFIG_BAR_OFFSET: u64 = 0x2000;
262 const DEVICE_CONFIG_SIZE: u64 = 0x1000;
263 const DEVICE_CONFIG_LAST: u64 = DEVICE_CONFIG_BAR_OFFSET + DEVICE_CONFIG_SIZE - 1;
264 const NOTIFICATION_BAR_OFFSET: u64 = 0x3000;
265 const NOTIFICATION_SIZE: u64 = 0x1000;
266 const NOTIFICATION_LAST: u64 = NOTIFICATION_BAR_OFFSET + NOTIFICATION_SIZE - 1;
267 const MSIX_TABLE_BAR_OFFSET: u64 = 0x6000;
268 const MSIX_TABLE_SIZE: u64 = 0x1000;
269 const MSIX_TABLE_LAST: u64 = MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE - 1;
270 const MSIX_PBA_BAR_OFFSET: u64 = 0x7000;
271 const MSIX_PBA_SIZE: u64 = 0x1000;
272 const MSIX_PBA_LAST: u64 = MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE - 1;
273 const CAPABILITY_BAR_SIZE: u64 = 0x8000;
274 
275 const NOTIFY_OFF_MULTIPLIER: u32 = 4; // A dword per notification address.
276 
277 const VIRTIO_PCI_VENDOR_ID: u16 = 0x1af4;
278 const VIRTIO_PCI_DEVICE_ID_BASE: u16 = 0x1040; // Add to device type to get device ID.
279 const VIRTIO_PCI_REVISION_ID: u8 = 1;
280 
281 const CAPABILITIES_BAR_NUM: usize = 0;
282 const SHMEM_BAR_NUM: usize = 2;
283 
284 struct QueueEvent {
285     event: Event,
286     ioevent_registered: bool,
287 }
288 
289 /// Implements the
290 /// [PCI](http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-650001)
291 /// transport for virtio devices.
292 pub struct VirtioPciDevice {
293     config_regs: PciConfiguration,
294     preferred_address: Option<PciAddress>,
295     pci_address: Option<PciAddress>,
296 
297     device: Box<dyn VirtioDevice>,
298     device_activated: bool,
299     disable_intx: bool,
300 
301     interrupt: Option<Interrupt>,
302     interrupt_evt: Option<IrqLevelEvent>,
303     queues: Vec<QueueConfig>,
304     queue_evts: Vec<QueueEvent>,
305     mem: GuestMemory,
306     settings_bar: PciBarIndex,
307     msix_config: Arc<Mutex<MsixConfig>>,
308     pm_config: Arc<Mutex<PmConfig>>,
309     common_config: VirtioPciCommonConfig,
310 
311     iommu: Option<Arc<Mutex<IpcMemoryMapper>>>,
312 
313     // API client that is present if the device has shared memory regions, and
314     // is used to map/unmap files into the shared memory region.
315     shared_memory_vm_memory_client: Option<VmMemoryClient>,
316 
317     // API client for registration of ioevents when PCI BAR reprogramming is detected.
318     ioevent_vm_memory_client: VmMemoryClient,
319 
320     // State only present while asleep.
321     sleep_state: Option<SleepState>,
322 
323     vm_control_tube: Arc<Mutex<Tube>>,
324 }
325 
326 enum SleepState {
327     // Asleep and device hasn't been activated yet by the guest.
328     Inactive,
329     // Asleep and device has been activated by the guest.
330     Active {
331         /// The queues returned from `VirtioDevice::virtio_sleep`.
332         /// Map is from queue index -> Queue.
333         activated_queues: BTreeMap<usize, Queue>,
334     },
335 }
336 
337 #[derive(Serialize, Deserialize)]
338 struct VirtioPciDeviceSnapshot {
339     config_regs: serde_json::Value,
340 
341     inner_device: serde_json::Value,
342     device_activated: bool,
343 
344     interrupt: Option<InterruptSnapshot>,
345     msix_config: serde_json::Value,
346     common_config: VirtioPciCommonConfig,
347 
348     queues: Vec<serde_json::Value>,
349     activated_queues: Option<Vec<(usize, serde_json::Value)>>,
350 }
351 
352 impl VirtioPciDevice {
353     /// Constructs a new PCI transport for the given virtio device.
new( mem: GuestMemory, device: Box<dyn VirtioDevice>, msi_device_tube: Tube, disable_intx: bool, shared_memory_vm_memory_client: Option<VmMemoryClient>, ioevent_vm_memory_client: VmMemoryClient, vm_control_tube: Tube, ) -> Result<Self>354     pub fn new(
355         mem: GuestMemory,
356         device: Box<dyn VirtioDevice>,
357         msi_device_tube: Tube,
358         disable_intx: bool,
359         shared_memory_vm_memory_client: Option<VmMemoryClient>,
360         ioevent_vm_memory_client: VmMemoryClient,
361         vm_control_tube: Tube,
362     ) -> Result<Self> {
363         // shared_memory_vm_memory_client is required if there are shared memory regions.
364         assert_eq!(
365             device.get_shared_memory_region().is_none(),
366             shared_memory_vm_memory_client.is_none()
367         );
368 
369         let mut queue_evts = Vec::new();
370         for _ in device.queue_max_sizes() {
371             queue_evts.push(QueueEvent {
372                 event: Event::new()?,
373                 ioevent_registered: false,
374             });
375         }
376         let queues = device
377             .queue_max_sizes()
378             .iter()
379             .map(|&s| QueueConfig::new(s, device.features()))
380             .collect();
381 
382         let pci_device_id = VIRTIO_PCI_DEVICE_ID_BASE + device.device_type() as u16;
383 
384         let (pci_device_class, pci_device_subclass) = match device.device_type() {
385             DeviceType::Block => (
386                 PciClassCode::MassStorage,
387                 &PciMassStorageSubclass::Other as &dyn PciSubclass,
388             ),
389             DeviceType::Gpu => (
390                 PciClassCode::DisplayController,
391                 &PciDisplaySubclass::Other as &dyn PciSubclass,
392             ),
393             _ => (
394                 PciClassCode::TooOld,
395                 &PciVirtioSubclass::NonTransitionalBase as &dyn PciSubclass,
396             ),
397         };
398 
399         let num_interrupts = device.num_interrupts();
400 
401         // One MSI-X vector per queue plus one for configuration changes.
402         let msix_num = u16::try_from(num_interrupts + 1).map_err(|_| base::Error::new(ERANGE))?;
403         let msix_config = Arc::new(Mutex::new(MsixConfig::new(
404             msix_num,
405             msi_device_tube,
406             PciId::new(VIRTIO_PCI_VENDOR_ID, pci_device_id).into(),
407             device.debug_label(),
408         )));
409 
410         let config_regs = PciConfiguration::new(
411             VIRTIO_PCI_VENDOR_ID,
412             pci_device_id,
413             pci_device_class,
414             pci_device_subclass,
415             None,
416             PciHeaderType::Device,
417             VIRTIO_PCI_VENDOR_ID,
418             pci_device_id,
419             VIRTIO_PCI_REVISION_ID,
420         );
421 
422         Ok(VirtioPciDevice {
423             config_regs,
424             preferred_address: device.pci_address(),
425             pci_address: None,
426             device,
427             device_activated: false,
428             disable_intx,
429             interrupt: None,
430             interrupt_evt: None,
431             queues,
432             queue_evts,
433             mem,
434             settings_bar: 0,
435             msix_config,
436             pm_config: Arc::new(Mutex::new(PmConfig::new(true))),
437             common_config: VirtioPciCommonConfig {
438                 driver_status: 0,
439                 config_generation: 0,
440                 device_feature_select: 0,
441                 driver_feature_select: 0,
442                 queue_select: 0,
443                 msix_config: VIRTIO_MSI_NO_VECTOR,
444             },
445             iommu: None,
446             shared_memory_vm_memory_client,
447             ioevent_vm_memory_client,
448             sleep_state: None,
449             vm_control_tube: Arc::new(Mutex::new(vm_control_tube)),
450         })
451     }
452 
is_driver_ready(&self) -> bool453     fn is_driver_ready(&self) -> bool {
454         let ready_bits = (VIRTIO_CONFIG_S_ACKNOWLEDGE
455             | VIRTIO_CONFIG_S_DRIVER
456             | VIRTIO_CONFIG_S_DRIVER_OK
457             | VIRTIO_CONFIG_S_FEATURES_OK) as u8;
458         (self.common_config.driver_status & ready_bits) == ready_bits
459             && self.common_config.driver_status & VIRTIO_CONFIG_S_FAILED as u8 == 0
460     }
461 
462     /// Determines if the driver has requested the device reset itself
is_reset_requested(&self) -> bool463     fn is_reset_requested(&self) -> bool {
464         self.common_config.driver_status == DEVICE_RESET as u8
465     }
466 
add_settings_pci_capabilities( &mut self, settings_bar: u8, ) -> std::result::Result<(), PciDeviceError>467     fn add_settings_pci_capabilities(
468         &mut self,
469         settings_bar: u8,
470     ) -> std::result::Result<(), PciDeviceError> {
471         // Add pointers to the different configuration structures from the PCI capabilities.
472         let common_cap = VirtioPciCap::new(
473             PciCapabilityType::CommonConfig,
474             settings_bar,
475             COMMON_CONFIG_BAR_OFFSET as u32,
476             COMMON_CONFIG_SIZE as u32,
477         );
478         self.config_regs
479             .add_capability(&common_cap, None)
480             .map_err(PciDeviceError::CapabilitiesSetup)?;
481 
482         let isr_cap = VirtioPciCap::new(
483             PciCapabilityType::IsrConfig,
484             settings_bar,
485             ISR_CONFIG_BAR_OFFSET as u32,
486             ISR_CONFIG_SIZE as u32,
487         );
488         self.config_regs
489             .add_capability(&isr_cap, None)
490             .map_err(PciDeviceError::CapabilitiesSetup)?;
491 
492         // TODO(dgreid) - set based on device's configuration size?
493         let device_cap = VirtioPciCap::new(
494             PciCapabilityType::DeviceConfig,
495             settings_bar,
496             DEVICE_CONFIG_BAR_OFFSET as u32,
497             DEVICE_CONFIG_SIZE as u32,
498         );
499         self.config_regs
500             .add_capability(&device_cap, None)
501             .map_err(PciDeviceError::CapabilitiesSetup)?;
502 
503         let notify_cap = VirtioPciNotifyCap::new(
504             PciCapabilityType::NotifyConfig,
505             settings_bar,
506             NOTIFICATION_BAR_OFFSET as u32,
507             NOTIFICATION_SIZE as u32,
508             Le32::from(NOTIFY_OFF_MULTIPLIER),
509         );
510         self.config_regs
511             .add_capability(&notify_cap, None)
512             .map_err(PciDeviceError::CapabilitiesSetup)?;
513 
514         //TODO(dgreid) - How will the configuration_cap work?
515         let configuration_cap = VirtioPciCap::new(PciCapabilityType::PciConfig, 0, 0, 0);
516         self.config_regs
517             .add_capability(&configuration_cap, None)
518             .map_err(PciDeviceError::CapabilitiesSetup)?;
519 
520         let msix_cap = MsixCap::new(
521             settings_bar,
522             self.msix_config.lock().num_vectors(),
523             MSIX_TABLE_BAR_OFFSET as u32,
524             settings_bar,
525             MSIX_PBA_BAR_OFFSET as u32,
526         );
527         self.config_regs
528             .add_capability(&msix_cap, Some(Box::new(self.msix_config.clone())))
529             .map_err(PciDeviceError::CapabilitiesSetup)?;
530 
531         self.config_regs
532             .add_capability(&PciPmCap::new(), Some(Box::new(self.pm_config.clone())))
533             .map_err(PciDeviceError::CapabilitiesSetup)?;
534 
535         self.settings_bar = settings_bar as PciBarIndex;
536         Ok(())
537     }
538 
539     /// Activates the underlying `VirtioDevice`. `assign_irq` has to be called first.
activate(&mut self) -> anyhow::Result<()>540     fn activate(&mut self) -> anyhow::Result<()> {
541         let interrupt = Interrupt::new(
542             self.interrupt_evt
543                 .as_ref()
544                 .ok_or_else(|| anyhow!("{} interrupt_evt is none", self.debug_label()))?
545                 .try_clone()
546                 .with_context(|| format!("{} failed to clone interrupt_evt", self.debug_label()))?,
547             Some(self.msix_config.clone()),
548             self.common_config.msix_config,
549             #[cfg(target_arch = "x86_64")]
550             Some(PmWakeupEvent::new(
551                 self.vm_control_tube.clone(),
552                 self.pm_config.clone(),
553                 MetricEventType::VirtioWakeup {
554                     virtio_id: self.device.device_type() as u32,
555                 },
556             )),
557         );
558         self.interrupt = Some(interrupt.clone());
559 
560         let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
561         let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
562 
563         // Use ready queues and their events.
564         let queues = self
565             .queues
566             .iter_mut()
567             .enumerate()
568             .zip(self.queue_evts.iter_mut())
569             .filter(|((_, q), _)| q.ready())
570             .map(|((queue_index, queue), evt)| {
571                 if !evt.ioevent_registered {
572                     self.ioevent_vm_memory_client
573                         .register_io_event(
574                             evt.event.try_clone().context("failed to clone Event")?,
575                             notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
576                             Datamatch::AnyLength,
577                         )
578                         .context("failed to register ioevent")?;
579                     evt.ioevent_registered = true;
580                 }
581                 let queue_evt = evt.event.try_clone().context("failed to clone queue_evt")?;
582                 Ok((
583                     queue_index,
584                     queue
585                         .activate(&self.mem, queue_evt)
586                         .context("failed to activate queue")?,
587                 ))
588             })
589             .collect::<anyhow::Result<BTreeMap<usize, Queue>>>()?;
590 
591         if let Err(e) = self.device.activate(self.mem.clone(), interrupt, queues) {
592             error!("{} activate failed: {:#}", self.debug_label(), e);
593             self.common_config.driver_status |= VIRTIO_CONFIG_S_NEEDS_RESET as u8;
594         } else {
595             self.device_activated = true;
596         }
597 
598         Ok(())
599     }
600 
unregister_ioevents(&mut self) -> anyhow::Result<()>601     fn unregister_ioevents(&mut self) -> anyhow::Result<()> {
602         let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
603         let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
604 
605         for (queue_index, evt) in self.queue_evts.iter_mut().enumerate() {
606             if evt.ioevent_registered {
607                 self.ioevent_vm_memory_client
608                     .unregister_io_event(
609                         evt.event.try_clone().context("failed to clone Event")?,
610                         notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
611                         Datamatch::AnyLength,
612                     )
613                     .context("failed to unregister ioevent")?;
614                 evt.ioevent_registered = false;
615             }
616         }
617         Ok(())
618     }
619 
virtio_device(&self) -> &dyn VirtioDevice620     pub fn virtio_device(&self) -> &dyn VirtioDevice {
621         self.device.as_ref()
622     }
623 
pci_address(&self) -> Option<PciAddress>624     pub fn pci_address(&self) -> Option<PciAddress> {
625         self.pci_address
626     }
627 
628     #[cfg(target_arch = "x86_64")]
handle_pm_status_change(&mut self, status: &PmStatusChange)629     fn handle_pm_status_change(&mut self, status: &PmStatusChange) {
630         if let Some(interrupt) = self.interrupt.as_mut() {
631             interrupt.set_wakeup_event_active(status.to == PciDevicePower::D3)
632         }
633     }
634 
635     #[cfg(not(target_arch = "x86_64"))]
handle_pm_status_change(&mut self, _status: &PmStatusChange)636     fn handle_pm_status_change(&mut self, _status: &PmStatusChange) {}
637 }
638 
639 impl PciDevice for VirtioPciDevice {
debug_label(&self) -> String640     fn debug_label(&self) -> String {
641         format!("pci{}", self.device.debug_label())
642     }
643 
preferred_address(&self) -> Option<PciAddress>644     fn preferred_address(&self) -> Option<PciAddress> {
645         self.preferred_address
646     }
647 
allocate_address( &mut self, resources: &mut SystemAllocator, ) -> std::result::Result<PciAddress, PciDeviceError>648     fn allocate_address(
649         &mut self,
650         resources: &mut SystemAllocator,
651     ) -> std::result::Result<PciAddress, PciDeviceError> {
652         if self.pci_address.is_none() {
653             if let Some(address) = self.preferred_address {
654                 if !resources.reserve_pci(
655                     Alloc::PciBar {
656                         bus: address.bus,
657                         dev: address.dev,
658                         func: address.func,
659                         bar: 0,
660                     },
661                     self.debug_label(),
662                 ) {
663                     return Err(PciDeviceError::PciAllocationFailed);
664                 }
665                 self.pci_address = Some(address);
666             } else {
667                 self.pci_address = match resources.allocate_pci(0, self.debug_label()) {
668                     Some(Alloc::PciBar {
669                         bus,
670                         dev,
671                         func,
672                         bar: _,
673                     }) => Some(PciAddress { bus, dev, func }),
674                     _ => None,
675                 }
676             }
677         }
678         self.pci_address.ok_or(PciDeviceError::PciAllocationFailed)
679     }
680 
keep_rds(&self) -> Vec<RawDescriptor>681     fn keep_rds(&self) -> Vec<RawDescriptor> {
682         let mut rds = self.device.keep_rds();
683         rds.extend(
684             self.queue_evts
685                 .iter()
686                 .map(|qe| qe.event.as_raw_descriptor()),
687         );
688         if let Some(interrupt_evt) = &self.interrupt_evt {
689             rds.extend(interrupt_evt.as_raw_descriptors());
690         }
691         let descriptor = self.msix_config.lock().get_msi_socket();
692         rds.push(descriptor);
693         if let Some(iommu) = &self.iommu {
694             rds.append(&mut iommu.lock().as_raw_descriptors());
695         }
696         rds.push(self.ioevent_vm_memory_client.as_raw_descriptor());
697         rds.push(self.vm_control_tube.lock().as_raw_descriptor());
698         rds
699     }
700 
assign_irq(&mut self, irq_evt: IrqLevelEvent, pin: PciInterruptPin, irq_num: u32)701     fn assign_irq(&mut self, irq_evt: IrqLevelEvent, pin: PciInterruptPin, irq_num: u32) {
702         self.interrupt_evt = Some(irq_evt);
703         if !self.disable_intx {
704             self.config_regs.set_irq(irq_num as u8, pin);
705         }
706     }
707 
allocate_io_bars( &mut self, resources: &mut SystemAllocator, ) -> std::result::Result<Vec<BarRange>, PciDeviceError>708     fn allocate_io_bars(
709         &mut self,
710         resources: &mut SystemAllocator,
711     ) -> std::result::Result<Vec<BarRange>, PciDeviceError> {
712         let device_type = self.device.device_type();
713         allocate_io_bars(
714             self,
715             |size: u64, alloc: Alloc, alloc_option: &AllocOptions| {
716                 resources
717                     .allocate_mmio(
718                         size,
719                         alloc,
720                         format!("virtio-{}-cap_bar", device_type),
721                         alloc_option,
722                     )
723                     .map_err(|e| PciDeviceError::IoAllocationFailed(size, e))
724             },
725         )
726     }
727 
allocate_device_bars( &mut self, resources: &mut SystemAllocator, ) -> std::result::Result<Vec<BarRange>, PciDeviceError>728     fn allocate_device_bars(
729         &mut self,
730         resources: &mut SystemAllocator,
731     ) -> std::result::Result<Vec<BarRange>, PciDeviceError> {
732         let device_type = self.device.device_type();
733         allocate_device_bars(
734             self,
735             |size: u64, alloc: Alloc, alloc_option: &AllocOptions| {
736                 resources
737                     .allocate_mmio(
738                         size,
739                         alloc,
740                         format!("virtio-{}-custom_bar", device_type),
741                         alloc_option,
742                     )
743                     .map_err(|e| PciDeviceError::IoAllocationFailed(size, e))
744             },
745         )
746     }
747 
destroy_device(&mut self)748     fn destroy_device(&mut self) {
749         if let Err(e) = self.unregister_ioevents() {
750             error!("error destroying {}: {:?}", &self.debug_label(), &e);
751         }
752     }
753 
get_bar_configuration(&self, bar_num: usize) -> Option<PciBarConfiguration>754     fn get_bar_configuration(&self, bar_num: usize) -> Option<PciBarConfiguration> {
755         self.config_regs.get_bar_configuration(bar_num)
756     }
757 
register_device_capabilities(&mut self) -> std::result::Result<(), PciDeviceError>758     fn register_device_capabilities(&mut self) -> std::result::Result<(), PciDeviceError> {
759         let mut caps = self.device.get_device_caps();
760         if let Some(region) = self.device.get_shared_memory_region() {
761             caps.push(Box::new(VirtioPciShmCap::new(
762                 PciCapabilityType::SharedMemoryConfig,
763                 SHMEM_BAR_NUM as u8,
764                 0,
765                 region.length,
766                 region.id,
767             )));
768         }
769 
770         for cap in caps {
771             self.config_regs
772                 .add_capability(&*cap, None)
773                 .map_err(PciDeviceError::CapabilitiesSetup)?;
774         }
775 
776         Ok(())
777     }
778 
read_config_register(&self, reg_idx: usize) -> u32779     fn read_config_register(&self, reg_idx: usize) -> u32 {
780         self.config_regs.read_reg(reg_idx)
781     }
782 
write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8])783     fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) {
784         if let Some(res) = self.config_regs.write_reg(reg_idx, offset, data) {
785             if let Some(msix_behavior) = res.downcast_ref::<MsixStatus>() {
786                 self.device.control_notify(*msix_behavior);
787             } else if let Some(status) = res.downcast_ref::<PmStatusChange>() {
788                 self.handle_pm_status_change(status);
789             }
790         }
791     }
792 
setup_pci_config_mapping( &mut self, shmem: &SharedMemory, base: usize, len: usize, ) -> std::result::Result<bool, PciDeviceError>793     fn setup_pci_config_mapping(
794         &mut self,
795         shmem: &SharedMemory,
796         base: usize,
797         len: usize,
798     ) -> std::result::Result<bool, PciDeviceError> {
799         self.config_regs
800             .setup_mapping(shmem, base, len)
801             .map(|_| true)
802             .map_err(PciDeviceError::MmioSetup)
803     }
804 
read_bar(&mut self, bar_index: usize, offset: u64, data: &mut [u8])805     fn read_bar(&mut self, bar_index: usize, offset: u64, data: &mut [u8]) {
806         if bar_index == self.settings_bar {
807             match offset {
808                 COMMON_CONFIG_BAR_OFFSET..=COMMON_CONFIG_LAST => self.common_config.read(
809                     offset - COMMON_CONFIG_BAR_OFFSET,
810                     data,
811                     &mut self.queues,
812                     self.device.as_mut(),
813                 ),
814                 ISR_CONFIG_BAR_OFFSET..=ISR_CONFIG_LAST => {
815                     if let Some(v) = data.get_mut(0) {
816                         // Reading this register resets it to 0.
817                         *v = if let Some(interrupt) = &self.interrupt {
818                             interrupt.read_and_reset_interrupt_status()
819                         } else {
820                             0
821                         };
822                     }
823                 }
824                 DEVICE_CONFIG_BAR_OFFSET..=DEVICE_CONFIG_LAST => {
825                     self.device
826                         .read_config(offset - DEVICE_CONFIG_BAR_OFFSET, data);
827                 }
828                 NOTIFICATION_BAR_OFFSET..=NOTIFICATION_LAST => {
829                     // Handled with ioevents.
830                 }
831                 MSIX_TABLE_BAR_OFFSET..=MSIX_TABLE_LAST => {
832                     self.msix_config
833                         .lock()
834                         .read_msix_table(offset - MSIX_TABLE_BAR_OFFSET, data);
835                 }
836                 MSIX_PBA_BAR_OFFSET..=MSIX_PBA_LAST => {
837                     self.msix_config
838                         .lock()
839                         .read_pba_entries(offset - MSIX_PBA_BAR_OFFSET, data);
840                 }
841                 _ => (),
842             }
843         } else {
844             self.device.read_bar(bar_index, offset, data);
845         }
846     }
847 
write_bar(&mut self, bar_index: usize, offset: u64, data: &[u8])848     fn write_bar(&mut self, bar_index: usize, offset: u64, data: &[u8]) {
849         if bar_index == self.settings_bar {
850             match offset {
851                 COMMON_CONFIG_BAR_OFFSET..=COMMON_CONFIG_LAST => self.common_config.write(
852                     offset - COMMON_CONFIG_BAR_OFFSET,
853                     data,
854                     &mut self.queues,
855                     self.device.as_mut(),
856                 ),
857                 ISR_CONFIG_BAR_OFFSET..=ISR_CONFIG_LAST => {
858                     if let Some(v) = data.first() {
859                         if let Some(interrupt) = &self.interrupt {
860                             interrupt.clear_interrupt_status_bits(*v);
861                         }
862                     }
863                 }
864                 DEVICE_CONFIG_BAR_OFFSET..=DEVICE_CONFIG_LAST => {
865                     self.device
866                         .write_config(offset - DEVICE_CONFIG_BAR_OFFSET, data);
867                 }
868                 NOTIFICATION_BAR_OFFSET..=NOTIFICATION_LAST => {
869                     // Notifications are normally handled with ioevents inside the hypervisor and
870                     // do not reach write_bar(). However, if the ioevent registration hasn't
871                     // finished yet, it is possible for a write to the notification region to make
872                     // it through as a normal MMIO exit and end up here. To handle that case,
873                     // provide a fallback that looks up the corresponding queue for the offset and
874                     // triggers its event, which is equivalent to what the ioevent would do.
875                     let queue_index = (offset - NOTIFICATION_BAR_OFFSET) as usize
876                         / NOTIFY_OFF_MULTIPLIER as usize;
877                     trace!("write_bar notification fallback for queue {}", queue_index);
878                     if let Some(evt) = self.queue_evts.get(queue_index) {
879                         let _ = evt.event.signal();
880                     }
881                 }
882                 MSIX_TABLE_BAR_OFFSET..=MSIX_TABLE_LAST => {
883                     let behavior = self
884                         .msix_config
885                         .lock()
886                         .write_msix_table(offset - MSIX_TABLE_BAR_OFFSET, data);
887                     self.device.control_notify(behavior);
888                 }
889                 MSIX_PBA_BAR_OFFSET..=MSIX_PBA_LAST => {
890                     self.msix_config
891                         .lock()
892                         .write_pba_entries(offset - MSIX_PBA_BAR_OFFSET, data);
893                 }
894                 _ => (),
895             }
896         } else {
897             self.device.write_bar(bar_index, offset, data);
898         }
899 
900         if !self.device_activated && self.is_driver_ready() {
901             if let Err(e) = self.activate() {
902                 error!("failed to activate device: {:#}", e);
903             }
904         }
905 
906         // Device has been reset by the driver
907         if self.device_activated && self.is_reset_requested() {
908             if let Err(e) = self.device.reset() {
909                 error!("failed to reset {} device: {:#}", self.debug_label(), e);
910             } else {
911                 self.device_activated = false;
912                 // reset queues
913                 self.queues.iter_mut().for_each(QueueConfig::reset);
914                 // select queue 0 by default
915                 self.common_config.queue_select = 0;
916                 if let Err(e) = self.unregister_ioevents() {
917                     error!("failed to unregister ioevents: {:#}", e);
918                 }
919             }
920         }
921     }
922 
on_device_sandboxed(&mut self)923     fn on_device_sandboxed(&mut self) {
924         self.device.on_device_sandboxed();
925     }
926 
927     #[cfg(target_arch = "x86_64")]
generate_acpi(&mut self, sdts: Vec<SDT>) -> Option<Vec<SDT>>928     fn generate_acpi(&mut self, sdts: Vec<SDT>) -> Option<Vec<SDT>> {
929         self.device.generate_acpi(&self.pci_address, sdts)
930     }
931 
as_virtio_pci_device(&self) -> Option<&VirtioPciDevice>932     fn as_virtio_pci_device(&self) -> Option<&VirtioPciDevice> {
933         Some(self)
934     }
935 }
936 
allocate_io_bars<F>( virtio_pci_device: &mut VirtioPciDevice, mut alloc_fn: F, ) -> std::result::Result<Vec<BarRange>, PciDeviceError> where F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,937 fn allocate_io_bars<F>(
938     virtio_pci_device: &mut VirtioPciDevice,
939     mut alloc_fn: F,
940 ) -> std::result::Result<Vec<BarRange>, PciDeviceError>
941 where
942     F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,
943 {
944     let address = virtio_pci_device
945         .pci_address
946         .expect("allocate_address must be called prior to allocate_io_bars");
947     // Allocate one bar for the structures pointed to by the capability structures.
948     let settings_config_addr = alloc_fn(
949         CAPABILITY_BAR_SIZE,
950         Alloc::PciBar {
951             bus: address.bus,
952             dev: address.dev,
953             func: address.func,
954             bar: 0,
955         },
956         AllocOptions::new()
957             .max_address(u32::MAX.into())
958             .align(CAPABILITY_BAR_SIZE),
959     )?;
960     let config = PciBarConfiguration::new(
961         CAPABILITIES_BAR_NUM,
962         CAPABILITY_BAR_SIZE,
963         PciBarRegionType::Memory32BitRegion,
964         PciBarPrefetchable::NotPrefetchable,
965     )
966     .set_address(settings_config_addr);
967     let settings_bar = virtio_pci_device
968         .config_regs
969         .add_pci_bar(config)
970         .map_err(|e| PciDeviceError::IoRegistrationFailed(settings_config_addr, e))?
971         as u8;
972     // Once the BARs are allocated, the capabilities can be added to the PCI configuration.
973     virtio_pci_device.add_settings_pci_capabilities(settings_bar)?;
974 
975     Ok(vec![BarRange {
976         addr: settings_config_addr,
977         size: CAPABILITY_BAR_SIZE,
978         prefetchable: false,
979     }])
980 }
981 
allocate_device_bars<F>( virtio_pci_device: &mut VirtioPciDevice, mut alloc_fn: F, ) -> std::result::Result<Vec<BarRange>, PciDeviceError> where F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,982 fn allocate_device_bars<F>(
983     virtio_pci_device: &mut VirtioPciDevice,
984     mut alloc_fn: F,
985 ) -> std::result::Result<Vec<BarRange>, PciDeviceError>
986 where
987     F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,
988 {
989     let address = virtio_pci_device
990         .pci_address
991         .expect("allocate_address must be called prior to allocate_device_bars");
992 
993     let configs = virtio_pci_device.device.get_device_bars(address);
994     let configs = if !configs.is_empty() {
995         configs
996     } else {
997         let region = match virtio_pci_device.device.get_shared_memory_region() {
998             None => return Ok(Vec::new()),
999             Some(r) => r,
1000         };
1001         let config = PciBarConfiguration::new(
1002             SHMEM_BAR_NUM,
1003             region
1004                 .length
1005                 .checked_next_power_of_two()
1006                 .expect("bar too large"),
1007             PciBarRegionType::Memory64BitRegion,
1008             PciBarPrefetchable::Prefetchable,
1009         );
1010 
1011         let alloc = Alloc::PciBar {
1012             bus: address.bus,
1013             dev: address.dev,
1014             func: address.func,
1015             bar: config.bar_index() as u8,
1016         };
1017 
1018         virtio_pci_device
1019             .device
1020             .set_shared_memory_mapper(Box::new(VmRequester::new(
1021                 virtio_pci_device
1022                     .shared_memory_vm_memory_client
1023                     .take()
1024                     .expect("missing shared_memory_tube"),
1025                 alloc,
1026                 // See comment VmMemoryRequest::execute
1027                 !virtio_pci_device
1028                     .device
1029                     .expose_shmem_descriptors_with_viommu(),
1030             )));
1031 
1032         vec![config]
1033     };
1034     let mut ranges = vec![];
1035     for config in configs {
1036         let device_addr = alloc_fn(
1037             config.size(),
1038             Alloc::PciBar {
1039                 bus: address.bus,
1040                 dev: address.dev,
1041                 func: address.func,
1042                 bar: config.bar_index() as u8,
1043             },
1044             AllocOptions::new()
1045                 .prefetchable(config.is_prefetchable())
1046                 .align(config.size()),
1047         )?;
1048         let config = config.set_address(device_addr);
1049         let _device_bar = virtio_pci_device
1050             .config_regs
1051             .add_pci_bar(config)
1052             .map_err(|e| PciDeviceError::IoRegistrationFailed(device_addr, e))?;
1053         ranges.push(BarRange {
1054             addr: device_addr,
1055             size: config.size(),
1056             prefetchable: false,
1057         });
1058     }
1059 
1060     if virtio_pci_device
1061         .device
1062         .get_shared_memory_region()
1063         .is_some()
1064     {
1065         virtio_pci_device
1066             .device
1067             .set_shared_memory_region_base(GuestAddress(ranges[0].addr));
1068     }
1069 
1070     Ok(ranges)
1071 }
1072 
1073 #[cfg(feature = "pci-hotplug")]
1074 impl HotPluggable for VirtioPciDevice {
1075     /// Sets PciAddress to pci_addr
set_pci_address(&mut self, pci_addr: PciAddress) -> std::result::Result<(), PciDeviceError>1076     fn set_pci_address(&mut self, pci_addr: PciAddress) -> std::result::Result<(), PciDeviceError> {
1077         self.pci_address = Some(pci_addr);
1078         Ok(())
1079     }
1080 
1081     /// Configures IO BAR layout without memory alloc.
configure_io_bars(&mut self) -> std::result::Result<(), PciDeviceError>1082     fn configure_io_bars(&mut self) -> std::result::Result<(), PciDeviceError> {
1083         let mut simple_allocator = SimpleAllocator::new(0);
1084         allocate_io_bars(self, |size, _, _| simple_allocator.alloc(size, size)).map(|_| ())
1085     }
1086 
1087     /// Configure device BAR layout without memory alloc.
configure_device_bars(&mut self) -> std::result::Result<(), PciDeviceError>1088     fn configure_device_bars(&mut self) -> std::result::Result<(), PciDeviceError> {
1089         // For device BAR, the space for CAPABILITY_BAR_SIZE should be skipped.
1090         let mut simple_allocator = SimpleAllocator::new(CAPABILITY_BAR_SIZE);
1091         allocate_device_bars(self, |size, _, _| simple_allocator.alloc(size, size)).map(|_| ())
1092     }
1093 }
1094 
1095 #[cfg(feature = "pci-hotplug")]
1096 /// A simple allocator that can allocate non-overlapping aligned intervals.
1097 ///
1098 /// The addresses allocated are not exclusively reserved for the device, and cannot be used for a
1099 /// static device. The allocated placeholder address describes the layout of PCI BAR for hotplugged
1100 /// devices. Actual memory allocation is handled by PCI BAR reprogramming initiated by guest OS.
1101 struct SimpleAllocator {
1102     current_address: u64,
1103 }
1104 
1105 #[cfg(feature = "pci-hotplug")]
1106 impl SimpleAllocator {
1107     /// Constructs SimpleAllocator. Address will start at or after base_address.
new(base_address: u64) -> Self1108     fn new(base_address: u64) -> Self {
1109         Self {
1110             current_address: base_address,
1111         }
1112     }
1113 
1114     /// Allocate memory with size and align. Returns the start of address.
alloc(&mut self, size: u64, align: u64) -> std::result::Result<u64, PciDeviceError>1115     fn alloc(&mut self, size: u64, align: u64) -> std::result::Result<u64, PciDeviceError> {
1116         if align > 0 {
1117             // aligns current_address upward to align.
1118             self.current_address = (self.current_address + align - 1) / align * align;
1119         }
1120         let start_address = self.current_address;
1121         self.current_address += size;
1122         Ok(start_address)
1123     }
1124 }
1125 
1126 impl Suspendable for VirtioPciDevice {
sleep(&mut self) -> anyhow::Result<()>1127     fn sleep(&mut self) -> anyhow::Result<()> {
1128         // If the device is already asleep, we should not request it to sleep again.
1129         if self.sleep_state.is_some() {
1130             return Ok(());
1131         }
1132 
1133         // Don't call `self.device.virtio_sleep()` for vhost user devices if the device is not
1134         // activated yet, since it will always return an empty Vec.
1135         if !self.device_activated && self.device.is_vhost_user() {
1136             // This will need to be set, so that a cold restore will work.
1137             self.sleep_state = Some(SleepState::Inactive);
1138             return Ok(());
1139         }
1140         if let Some(queues) = self.device.virtio_sleep()? {
1141             anyhow::ensure!(
1142                 self.device_activated,
1143                 format!(
1144                     "unactivated device {} returned queues on sleep",
1145                     self.debug_label()
1146                 ),
1147             );
1148             self.sleep_state = Some(SleepState::Active {
1149                 activated_queues: queues,
1150             });
1151         } else {
1152             anyhow::ensure!(
1153                 !self.device_activated,
1154                 format!(
1155                     "activated device {} didn't return queues on sleep",
1156                     self.debug_label()
1157                 ),
1158             );
1159             self.sleep_state = Some(SleepState::Inactive);
1160         }
1161         Ok(())
1162     }
1163 
wake(&mut self) -> anyhow::Result<()>1164     fn wake(&mut self) -> anyhow::Result<()> {
1165         // A vhost user device that isn't activated doesn't need to be woken up.
1166         if !self.device_activated && self.device.is_vhost_user() {
1167             self.sleep_state = None;
1168             return Ok(());
1169         }
1170         match self.sleep_state.take() {
1171             None => {
1172                 // If the device is already awake, we should not request it to wake again.
1173             }
1174             Some(SleepState::Inactive) => {
1175                 self.device.virtio_wake(None).with_context(|| {
1176                     format!(
1177                         "virtio_wake failed for {}, can't recover",
1178                         self.debug_label(),
1179                     )
1180                 })?;
1181             }
1182             Some(SleepState::Active { activated_queues }) => {
1183                 self.device
1184                     .virtio_wake(Some((
1185                         self.mem.clone(),
1186                         self.interrupt
1187                             .clone()
1188                             .expect("interrupt missing for already active queues"),
1189                         activated_queues,
1190                     )))
1191                     .with_context(|| {
1192                         format!(
1193                             "virtio_wake failed for {}, can't recover",
1194                             self.debug_label(),
1195                         )
1196                     })?;
1197             }
1198         };
1199         Ok(())
1200     }
1201 
snapshot(&mut self) -> anyhow::Result<serde_json::Value>1202     fn snapshot(&mut self) -> anyhow::Result<serde_json::Value> {
1203         if self.iommu.is_some() {
1204             return Err(anyhow!("Cannot snapshot if iommu is present."));
1205         }
1206 
1207         serde_json::to_value(VirtioPciDeviceSnapshot {
1208             config_regs: self.config_regs.snapshot()?,
1209             inner_device: self.device.virtio_snapshot()?,
1210             device_activated: self.device_activated,
1211             interrupt: self.interrupt.as_ref().map(|i| i.snapshot()),
1212             msix_config: self.msix_config.lock().snapshot()?,
1213             common_config: self.common_config,
1214             queues: self
1215                 .queues
1216                 .iter()
1217                 .map(|q| q.snapshot())
1218                 .collect::<anyhow::Result<Vec<_>>>()?,
1219             activated_queues: match &self.sleep_state {
1220                 None => {
1221                     anyhow::bail!("tried snapshotting while awake")
1222                 }
1223                 Some(SleepState::Inactive) => None,
1224                 Some(SleepState::Active { activated_queues }) => {
1225                     let mut serialized_queues = Vec::new();
1226                     for (index, queue) in activated_queues.iter() {
1227                         serialized_queues.push((*index, queue.snapshot()?));
1228                     }
1229                     Some(serialized_queues)
1230                 }
1231             },
1232         })
1233         .context("failed to serialize VirtioPciDeviceSnapshot")
1234     }
1235 
restore(&mut self, data: serde_json::Value) -> anyhow::Result<()>1236     fn restore(&mut self, data: serde_json::Value) -> anyhow::Result<()> {
1237         // Restoring from an activated state is more complex and low priority, so just fail for
1238         // now. We'll need to reset the device before restoring, e.g. must call
1239         // self.unregister_ioevents().
1240         anyhow::ensure!(
1241             !self.device_activated,
1242             "tried to restore after virtio device activated. not supported yet"
1243         );
1244 
1245         let deser: VirtioPciDeviceSnapshot = serde_json::from_value(data)?;
1246 
1247         self.config_regs.restore(deser.config_regs)?;
1248         self.device_activated = deser.device_activated;
1249 
1250         self.msix_config.lock().restore(deser.msix_config)?;
1251         self.common_config = deser.common_config;
1252 
1253         assert_eq!(
1254             self.queues.len(),
1255             deser.queues.len(),
1256             "device must have the same number of queues"
1257         );
1258         for (q, s) in self.queues.iter_mut().zip(deser.queues.into_iter()) {
1259             q.restore(s)?;
1260         }
1261 
1262         // Verify we are asleep and inactive.
1263         match &self.sleep_state {
1264             None => {
1265                 anyhow::bail!("tried restoring while awake")
1266             }
1267             Some(SleepState::Inactive) => {}
1268             Some(SleepState::Active { .. }) => {
1269                 anyhow::bail!("tried to restore after virtio device activated. not supported yet")
1270             }
1271         };
1272         // Restore `sleep_state`.
1273         if let Some(activated_queues_snapshot) = deser.activated_queues {
1274             let mut activated_queues = BTreeMap::new();
1275             for (index, queue_snapshot) in activated_queues_snapshot {
1276                 let queue_config = self
1277                     .queues
1278                     .get(index)
1279                     .with_context(|| format!("missing queue config for activated queue {index}"))?;
1280                 let queue_evt = self
1281                     .queue_evts
1282                     .get(index)
1283                     .with_context(|| format!("missing queue event for activated queue {index}"))?
1284                     .event
1285                     .try_clone()
1286                     .context("failed to clone queue event")?;
1287                 activated_queues.insert(
1288                     index,
1289                     Queue::restore(queue_config, queue_snapshot, &self.mem, queue_evt)?,
1290                 );
1291             }
1292 
1293             // Restore the activated queues.
1294             self.sleep_state = Some(SleepState::Active { activated_queues });
1295         } else {
1296             self.sleep_state = Some(SleepState::Inactive);
1297         }
1298 
1299         // Also replicate the other work in activate: initialize the interrupt and queues
1300         // events. This could just as easily be done in `wake` instead.
1301         // NOTE: Needs to be done last in `restore` because it relies on the other VirtioPciDevice
1302         // fields.
1303         if let Some(deser_interrupt) = deser.interrupt {
1304             self.interrupt = Some(Interrupt::new_from_snapshot(
1305                 self.interrupt_evt
1306                     .as_ref()
1307                     .ok_or_else(|| anyhow!("{} interrupt_evt is none", self.debug_label()))?
1308                     .try_clone()
1309                     .with_context(|| {
1310                         format!("{} failed to clone interrupt_evt", self.debug_label())
1311                     })?,
1312                 Some(self.msix_config.clone()),
1313                 self.common_config.msix_config,
1314                 deser_interrupt,
1315                 #[cfg(target_arch = "x86_64")]
1316                 Some(PmWakeupEvent::new(
1317                     self.vm_control_tube.clone(),
1318                     self.pm_config.clone(),
1319                     MetricEventType::VirtioWakeup {
1320                         virtio_id: self.device.device_type() as u32,
1321                     },
1322                 )),
1323             ));
1324         }
1325 
1326         // Call register_io_events for the activated queue events.
1327         let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
1328         let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
1329         self.queues
1330             .iter()
1331             .enumerate()
1332             .zip(self.queue_evts.iter_mut())
1333             .filter(|((_, q), _)| q.ready())
1334             .try_for_each(|((queue_index, _queue), evt)| {
1335                 if !evt.ioevent_registered {
1336                     self.ioevent_vm_memory_client
1337                         .register_io_event(
1338                             evt.event.try_clone().context("failed to clone Event")?,
1339                             notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
1340                             Datamatch::AnyLength,
1341                         )
1342                         .context("failed to register ioevent")?;
1343                     evt.ioevent_registered = true;
1344                 }
1345                 Ok::<(), anyhow::Error>(())
1346             })?;
1347 
1348         // There might be data in the queue that wasn't drained by the device
1349         // at the time it was snapshotted. In this case, the doorbell should
1350         // still be signaled. If it is not, the driver may never re-trigger the
1351         // doorbell, and the device will stall. So here, we explicitly signal
1352         // every doorbell. Spurious doorbells are safe (devices will check their
1353         // queue, realize nothing is there, and go back to sleep.)
1354         self.queue_evts.iter_mut().try_for_each(|queue_event| {
1355             queue_event
1356                 .event
1357                 .signal()
1358                 .context("failed to wake doorbell")
1359         })?;
1360 
1361         if self.device.is_vhost_user() {
1362             let (queue_evts, interrupt) = if self.device_activated {
1363                 (
1364                     Some(
1365                         self.queue_evts
1366                             .iter()
1367                             .map(|queue_evt| {
1368                                 queue_evt
1369                                     .event
1370                                     .try_clone()
1371                                     .context("Failed to clone queue_evt")
1372                             })
1373                             .collect::<anyhow::Result<Vec<_>>>()?,
1374                     ),
1375                     Some(
1376                         self.interrupt
1377                             .as_ref()
1378                             .expect("Interrupt should not be empty if device was activated.")
1379                             .clone(),
1380                     ),
1381                 )
1382             } else {
1383                 (None, None)
1384             };
1385             self.device.vhost_user_restore(
1386                 deser.inner_device,
1387                 &self.queues,
1388                 queue_evts,
1389                 interrupt,
1390                 self.mem.clone(),
1391                 &self.msix_config,
1392                 self.device_activated,
1393             )?;
1394         } else {
1395             self.device.virtio_restore(deser.inner_device)?;
1396         }
1397 
1398         Ok(())
1399     }
1400 }
1401 
1402 struct VmRequester {
1403     vm_memory_client: VmMemoryClient,
1404     alloc: Alloc,
1405     mappings: BTreeMap<u64, VmMemoryRegionId>,
1406     needs_prepare: bool,
1407 }
1408 
1409 impl VmRequester {
new(vm_memory_client: VmMemoryClient, alloc: Alloc, do_prepare: bool) -> Self1410     fn new(vm_memory_client: VmMemoryClient, alloc: Alloc, do_prepare: bool) -> Self {
1411         Self {
1412             vm_memory_client,
1413             alloc,
1414             mappings: BTreeMap::new(),
1415             needs_prepare: do_prepare,
1416         }
1417     }
1418 }
1419 
1420 impl SharedMemoryMapper for VmRequester {
add_mapping( &mut self, source: VmMemorySource, offset: u64, prot: Protection, cache: MemCacheType, ) -> anyhow::Result<()>1421     fn add_mapping(
1422         &mut self,
1423         source: VmMemorySource,
1424         offset: u64,
1425         prot: Protection,
1426         cache: MemCacheType,
1427     ) -> anyhow::Result<()> {
1428         if self.needs_prepare {
1429             self.vm_memory_client
1430                 .prepare_shared_memory_region(self.alloc, cache)
1431                 .context("prepare_shared_memory_region failed")?;
1432             self.needs_prepare = false;
1433         }
1434 
1435         let id = self
1436             .vm_memory_client
1437             .register_memory(
1438                 source,
1439                 VmMemoryDestination::ExistingAllocation {
1440                     allocation: self.alloc,
1441                     offset,
1442                 },
1443                 prot,
1444                 cache,
1445             )
1446             .context("register_memory failed")?;
1447 
1448         self.mappings.insert(offset, id);
1449         Ok(())
1450     }
1451 
remove_mapping(&mut self, offset: u64) -> anyhow::Result<()>1452     fn remove_mapping(&mut self, offset: u64) -> anyhow::Result<()> {
1453         let id = self.mappings.remove(&offset).context("invalid offset")?;
1454         self.vm_memory_client
1455             .unregister_memory(id)
1456             .context("unregister_memory failed")
1457     }
1458 
as_raw_descriptor(&self) -> Option<RawDescriptor>1459     fn as_raw_descriptor(&self) -> Option<RawDescriptor> {
1460         Some(self.vm_memory_client.as_raw_descriptor())
1461     }
1462 }
1463 
1464 #[cfg(test)]
1465 mod tests {
1466 
1467     #[cfg(feature = "pci-hotplug")]
1468     #[test]
allocate_aligned_address()1469     fn allocate_aligned_address() {
1470         let mut simple_allocator = super::SimpleAllocator::new(0);
1471         // start at 0, aligned to 0x80. Interval end at 0x20.
1472         assert_eq!(simple_allocator.alloc(0x20, 0x80).unwrap(), 0);
1473         // 0x20 => start at 0x40. Interval end at 0x80.
1474         assert_eq!(simple_allocator.alloc(0x40, 0x40).unwrap(), 0x40);
1475         // 0x80 => start at 0x80, Interval end at 0x108.
1476         assert_eq!(simple_allocator.alloc(0x88, 0x80).unwrap(), 0x80);
1477         // 0x108 => start at 0x180. Interval end at 0x1b0.
1478         assert_eq!(simple_allocator.alloc(0x30, 0x80).unwrap(), 0x180);
1479     }
1480 }
1481