1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::collections::BTreeMap;
6 use std::sync::Arc;
7
8 #[cfg(target_arch = "x86_64")]
9 use acpi_tables::sdt::SDT;
10 use anyhow::anyhow;
11 use anyhow::Context;
12 use base::error;
13 use base::trace;
14 use base::AsRawDescriptor;
15 use base::AsRawDescriptors;
16 use base::Event;
17 use base::Protection;
18 use base::RawDescriptor;
19 use base::Result;
20 use base::SharedMemory;
21 use base::Tube;
22 use data_model::Le32;
23 use hypervisor::Datamatch;
24 use hypervisor::MemCacheType;
25 use libc::ERANGE;
26 #[cfg(target_arch = "x86_64")]
27 use metrics::MetricEventType;
28 use resources::Alloc;
29 use resources::AllocOptions;
30 use resources::SystemAllocator;
31 use serde::Deserialize;
32 use serde::Serialize;
33 use sync::Mutex;
34 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_ACKNOWLEDGE;
35 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_DRIVER;
36 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_DRIVER_OK;
37 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_FAILED;
38 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_FEATURES_OK;
39 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_NEEDS_RESET;
40 use vm_control::api::VmMemoryClient;
41 use vm_control::VmMemoryDestination;
42 use vm_control::VmMemoryRegionId;
43 use vm_control::VmMemorySource;
44 use vm_memory::GuestAddress;
45 use vm_memory::GuestMemory;
46 use zerocopy::AsBytes;
47 use zerocopy::FromBytes;
48 use zerocopy::FromZeroes;
49
50 use self::virtio_pci_common_config::VirtioPciCommonConfig;
51 use super::*;
52 #[cfg(target_arch = "x86_64")]
53 use crate::acpi::PmWakeupEvent;
54 #[cfg(target_arch = "x86_64")]
55 use crate::pci::pm::PciDevicePower;
56 use crate::pci::pm::PciPmCap;
57 use crate::pci::pm::PmConfig;
58 use crate::pci::pm::PmStatusChange;
59 use crate::pci::BarRange;
60 use crate::pci::MsixCap;
61 use crate::pci::MsixConfig;
62 use crate::pci::MsixStatus;
63 use crate::pci::PciAddress;
64 use crate::pci::PciBarConfiguration;
65 use crate::pci::PciBarIndex;
66 use crate::pci::PciBarPrefetchable;
67 use crate::pci::PciBarRegionType;
68 use crate::pci::PciCapability;
69 use crate::pci::PciCapabilityID;
70 use crate::pci::PciClassCode;
71 use crate::pci::PciConfiguration;
72 use crate::pci::PciDevice;
73 use crate::pci::PciDeviceError;
74 use crate::pci::PciDisplaySubclass;
75 use crate::pci::PciHeaderType;
76 use crate::pci::PciId;
77 use crate::pci::PciInterruptPin;
78 use crate::pci::PciMassStorageSubclass;
79 use crate::pci::PciSubclass;
80 use crate::virtio::ipc_memory_mapper::IpcMemoryMapper;
81 #[cfg(feature = "pci-hotplug")]
82 use crate::HotPluggable;
83 use crate::IrqLevelEvent;
84 use crate::Suspendable;
85
86 #[repr(u8)]
87 #[derive(Debug, Copy, Clone, enumn::N)]
88 pub enum PciCapabilityType {
89 CommonConfig = 1,
90 NotifyConfig = 2,
91 IsrConfig = 3,
92 DeviceConfig = 4,
93 PciConfig = 5,
94 // Doorbell, Notification and SharedMemory are Virtio Vhost User related PCI
95 // capabilities. Specified in 5.7.7.4 here
96 // https://stefanha.github.io/virtio/vhost-user-slave.html#x1-2830007.
97 DoorbellConfig = 6,
98 NotificationConfig = 7,
99 SharedMemoryConfig = 8,
100 }
101
102 #[allow(dead_code)]
103 #[repr(C)]
104 #[derive(Clone, Copy, FromZeroes, FromBytes, AsBytes)]
105 pub struct VirtioPciCap {
106 // cap_vndr and cap_next are autofilled based on id() in pci configuration
107 pub cap_vndr: u8, // Generic PCI field: PCI_CAP_ID_VNDR
108 pub cap_next: u8, // Generic PCI field: next ptr
109 pub cap_len: u8, // Generic PCI field: capability length
110 pub cfg_type: u8, // Identifies the structure.
111 pub bar: u8, // Where to find it.
112 id: u8, // Multiple capabilities of the same type
113 padding: [u8; 2], // Pad to full dword.
114 pub offset: Le32, // Offset within bar.
115 pub length: Le32, // Length of the structure, in bytes.
116 }
117
118 impl PciCapability for VirtioPciCap {
bytes(&self) -> &[u8]119 fn bytes(&self) -> &[u8] {
120 self.as_bytes()
121 }
122
id(&self) -> PciCapabilityID123 fn id(&self) -> PciCapabilityID {
124 PciCapabilityID::VendorSpecific
125 }
126
writable_bits(&self) -> Vec<u32>127 fn writable_bits(&self) -> Vec<u32> {
128 vec![0u32; 4]
129 }
130 }
131
132 impl VirtioPciCap {
new(cfg_type: PciCapabilityType, bar: u8, offset: u32, length: u32) -> Self133 pub fn new(cfg_type: PciCapabilityType, bar: u8, offset: u32, length: u32) -> Self {
134 VirtioPciCap {
135 cap_vndr: 0,
136 cap_next: 0,
137 cap_len: std::mem::size_of::<VirtioPciCap>() as u8,
138 cfg_type: cfg_type as u8,
139 bar,
140 id: 0,
141 padding: [0; 2],
142 offset: Le32::from(offset),
143 length: Le32::from(length),
144 }
145 }
146
set_cap_len(&mut self, cap_len: u8)147 pub fn set_cap_len(&mut self, cap_len: u8) {
148 self.cap_len = cap_len;
149 }
150 }
151
152 #[allow(dead_code)]
153 #[repr(C)]
154 #[derive(Clone, Copy, AsBytes, FromZeroes, FromBytes)]
155 pub struct VirtioPciNotifyCap {
156 cap: VirtioPciCap,
157 notify_off_multiplier: Le32,
158 }
159
160 impl PciCapability for VirtioPciNotifyCap {
bytes(&self) -> &[u8]161 fn bytes(&self) -> &[u8] {
162 self.as_bytes()
163 }
164
id(&self) -> PciCapabilityID165 fn id(&self) -> PciCapabilityID {
166 PciCapabilityID::VendorSpecific
167 }
168
writable_bits(&self) -> Vec<u32>169 fn writable_bits(&self) -> Vec<u32> {
170 vec![0u32; 5]
171 }
172 }
173
174 impl VirtioPciNotifyCap {
new( cfg_type: PciCapabilityType, bar: u8, offset: u32, length: u32, multiplier: Le32, ) -> Self175 pub fn new(
176 cfg_type: PciCapabilityType,
177 bar: u8,
178 offset: u32,
179 length: u32,
180 multiplier: Le32,
181 ) -> Self {
182 VirtioPciNotifyCap {
183 cap: VirtioPciCap {
184 cap_vndr: 0,
185 cap_next: 0,
186 cap_len: std::mem::size_of::<VirtioPciNotifyCap>() as u8,
187 cfg_type: cfg_type as u8,
188 bar,
189 id: 0,
190 padding: [0; 2],
191 offset: Le32::from(offset),
192 length: Le32::from(length),
193 },
194 notify_off_multiplier: multiplier,
195 }
196 }
197 }
198
199 #[repr(C)]
200 #[derive(Clone, Copy, AsBytes, FromZeroes, FromBytes)]
201 pub struct VirtioPciShmCap {
202 cap: VirtioPciCap,
203 offset_hi: Le32, // Most sig 32 bits of offset
204 length_hi: Le32, // Most sig 32 bits of length
205 }
206
207 impl PciCapability for VirtioPciShmCap {
bytes(&self) -> &[u8]208 fn bytes(&self) -> &[u8] {
209 self.as_bytes()
210 }
211
id(&self) -> PciCapabilityID212 fn id(&self) -> PciCapabilityID {
213 PciCapabilityID::VendorSpecific
214 }
215
writable_bits(&self) -> Vec<u32>216 fn writable_bits(&self) -> Vec<u32> {
217 vec![0u32; 6]
218 }
219 }
220
221 impl VirtioPciShmCap {
new(cfg_type: PciCapabilityType, bar: u8, offset: u64, length: u64, shmid: u8) -> Self222 pub fn new(cfg_type: PciCapabilityType, bar: u8, offset: u64, length: u64, shmid: u8) -> Self {
223 VirtioPciShmCap {
224 cap: VirtioPciCap {
225 cap_vndr: 0,
226 cap_next: 0,
227 cap_len: std::mem::size_of::<VirtioPciShmCap>() as u8,
228 cfg_type: cfg_type as u8,
229 bar,
230 id: shmid,
231 padding: [0; 2],
232 offset: Le32::from(offset as u32),
233 length: Le32::from(length as u32),
234 },
235 offset_hi: Le32::from((offset >> 32) as u32),
236 length_hi: Le32::from((length >> 32) as u32),
237 }
238 }
239 }
240
241 /// Subclasses for virtio.
242 #[allow(dead_code)]
243 #[derive(Copy, Clone)]
244 pub enum PciVirtioSubclass {
245 NonTransitionalBase = 0xff,
246 }
247
248 impl PciSubclass for PciVirtioSubclass {
get_register_value(&self) -> u8249 fn get_register_value(&self) -> u8 {
250 *self as u8
251 }
252 }
253
254 // Allocate one bar for the structs pointed to by the capability structures.
255 const COMMON_CONFIG_BAR_OFFSET: u64 = 0x0000;
256 const COMMON_CONFIG_SIZE: u64 = 56;
257 const COMMON_CONFIG_LAST: u64 = COMMON_CONFIG_BAR_OFFSET + COMMON_CONFIG_SIZE - 1;
258 const ISR_CONFIG_BAR_OFFSET: u64 = 0x1000;
259 const ISR_CONFIG_SIZE: u64 = 1;
260 const ISR_CONFIG_LAST: u64 = ISR_CONFIG_BAR_OFFSET + ISR_CONFIG_SIZE - 1;
261 const DEVICE_CONFIG_BAR_OFFSET: u64 = 0x2000;
262 const DEVICE_CONFIG_SIZE: u64 = 0x1000;
263 const DEVICE_CONFIG_LAST: u64 = DEVICE_CONFIG_BAR_OFFSET + DEVICE_CONFIG_SIZE - 1;
264 const NOTIFICATION_BAR_OFFSET: u64 = 0x3000;
265 const NOTIFICATION_SIZE: u64 = 0x1000;
266 const NOTIFICATION_LAST: u64 = NOTIFICATION_BAR_OFFSET + NOTIFICATION_SIZE - 1;
267 const MSIX_TABLE_BAR_OFFSET: u64 = 0x6000;
268 const MSIX_TABLE_SIZE: u64 = 0x1000;
269 const MSIX_TABLE_LAST: u64 = MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE - 1;
270 const MSIX_PBA_BAR_OFFSET: u64 = 0x7000;
271 const MSIX_PBA_SIZE: u64 = 0x1000;
272 const MSIX_PBA_LAST: u64 = MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE - 1;
273 const CAPABILITY_BAR_SIZE: u64 = 0x8000;
274
275 const NOTIFY_OFF_MULTIPLIER: u32 = 4; // A dword per notification address.
276
277 const VIRTIO_PCI_VENDOR_ID: u16 = 0x1af4;
278 const VIRTIO_PCI_DEVICE_ID_BASE: u16 = 0x1040; // Add to device type to get device ID.
279 const VIRTIO_PCI_REVISION_ID: u8 = 1;
280
281 const CAPABILITIES_BAR_NUM: usize = 0;
282 const SHMEM_BAR_NUM: usize = 2;
283
284 struct QueueEvent {
285 event: Event,
286 ioevent_registered: bool,
287 }
288
289 /// Implements the
290 /// [PCI](http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-650001)
291 /// transport for virtio devices.
292 pub struct VirtioPciDevice {
293 config_regs: PciConfiguration,
294 preferred_address: Option<PciAddress>,
295 pci_address: Option<PciAddress>,
296
297 device: Box<dyn VirtioDevice>,
298 device_activated: bool,
299 disable_intx: bool,
300
301 interrupt: Option<Interrupt>,
302 interrupt_evt: Option<IrqLevelEvent>,
303 queues: Vec<QueueConfig>,
304 queue_evts: Vec<QueueEvent>,
305 mem: GuestMemory,
306 settings_bar: PciBarIndex,
307 msix_config: Arc<Mutex<MsixConfig>>,
308 pm_config: Arc<Mutex<PmConfig>>,
309 common_config: VirtioPciCommonConfig,
310
311 iommu: Option<Arc<Mutex<IpcMemoryMapper>>>,
312
313 // API client that is present if the device has shared memory regions, and
314 // is used to map/unmap files into the shared memory region.
315 shared_memory_vm_memory_client: Option<VmMemoryClient>,
316
317 // API client for registration of ioevents when PCI BAR reprogramming is detected.
318 ioevent_vm_memory_client: VmMemoryClient,
319
320 // State only present while asleep.
321 sleep_state: Option<SleepState>,
322
323 vm_control_tube: Arc<Mutex<Tube>>,
324 }
325
326 enum SleepState {
327 // Asleep and device hasn't been activated yet by the guest.
328 Inactive,
329 // Asleep and device has been activated by the guest.
330 Active {
331 /// The queues returned from `VirtioDevice::virtio_sleep`.
332 /// Map is from queue index -> Queue.
333 activated_queues: BTreeMap<usize, Queue>,
334 },
335 }
336
337 #[derive(Serialize, Deserialize)]
338 struct VirtioPciDeviceSnapshot {
339 config_regs: serde_json::Value,
340
341 inner_device: serde_json::Value,
342 device_activated: bool,
343
344 interrupt: Option<InterruptSnapshot>,
345 msix_config: serde_json::Value,
346 common_config: VirtioPciCommonConfig,
347
348 queues: Vec<serde_json::Value>,
349 activated_queues: Option<Vec<(usize, serde_json::Value)>>,
350 }
351
352 impl VirtioPciDevice {
353 /// Constructs a new PCI transport for the given virtio device.
new( mem: GuestMemory, device: Box<dyn VirtioDevice>, msi_device_tube: Tube, disable_intx: bool, shared_memory_vm_memory_client: Option<VmMemoryClient>, ioevent_vm_memory_client: VmMemoryClient, vm_control_tube: Tube, ) -> Result<Self>354 pub fn new(
355 mem: GuestMemory,
356 device: Box<dyn VirtioDevice>,
357 msi_device_tube: Tube,
358 disable_intx: bool,
359 shared_memory_vm_memory_client: Option<VmMemoryClient>,
360 ioevent_vm_memory_client: VmMemoryClient,
361 vm_control_tube: Tube,
362 ) -> Result<Self> {
363 // shared_memory_vm_memory_client is required if there are shared memory regions.
364 assert_eq!(
365 device.get_shared_memory_region().is_none(),
366 shared_memory_vm_memory_client.is_none()
367 );
368
369 let mut queue_evts = Vec::new();
370 for _ in device.queue_max_sizes() {
371 queue_evts.push(QueueEvent {
372 event: Event::new()?,
373 ioevent_registered: false,
374 });
375 }
376 let queues = device
377 .queue_max_sizes()
378 .iter()
379 .map(|&s| QueueConfig::new(s, device.features()))
380 .collect();
381
382 let pci_device_id = VIRTIO_PCI_DEVICE_ID_BASE + device.device_type() as u16;
383
384 let (pci_device_class, pci_device_subclass) = match device.device_type() {
385 DeviceType::Block => (
386 PciClassCode::MassStorage,
387 &PciMassStorageSubclass::Other as &dyn PciSubclass,
388 ),
389 DeviceType::Gpu => (
390 PciClassCode::DisplayController,
391 &PciDisplaySubclass::Other as &dyn PciSubclass,
392 ),
393 _ => (
394 PciClassCode::TooOld,
395 &PciVirtioSubclass::NonTransitionalBase as &dyn PciSubclass,
396 ),
397 };
398
399 let num_interrupts = device.num_interrupts();
400
401 // One MSI-X vector per queue plus one for configuration changes.
402 let msix_num = u16::try_from(num_interrupts + 1).map_err(|_| base::Error::new(ERANGE))?;
403 let msix_config = Arc::new(Mutex::new(MsixConfig::new(
404 msix_num,
405 msi_device_tube,
406 PciId::new(VIRTIO_PCI_VENDOR_ID, pci_device_id).into(),
407 device.debug_label(),
408 )));
409
410 let config_regs = PciConfiguration::new(
411 VIRTIO_PCI_VENDOR_ID,
412 pci_device_id,
413 pci_device_class,
414 pci_device_subclass,
415 None,
416 PciHeaderType::Device,
417 VIRTIO_PCI_VENDOR_ID,
418 pci_device_id,
419 VIRTIO_PCI_REVISION_ID,
420 );
421
422 Ok(VirtioPciDevice {
423 config_regs,
424 preferred_address: device.pci_address(),
425 pci_address: None,
426 device,
427 device_activated: false,
428 disable_intx,
429 interrupt: None,
430 interrupt_evt: None,
431 queues,
432 queue_evts,
433 mem,
434 settings_bar: 0,
435 msix_config,
436 pm_config: Arc::new(Mutex::new(PmConfig::new(true))),
437 common_config: VirtioPciCommonConfig {
438 driver_status: 0,
439 config_generation: 0,
440 device_feature_select: 0,
441 driver_feature_select: 0,
442 queue_select: 0,
443 msix_config: VIRTIO_MSI_NO_VECTOR,
444 },
445 iommu: None,
446 shared_memory_vm_memory_client,
447 ioevent_vm_memory_client,
448 sleep_state: None,
449 vm_control_tube: Arc::new(Mutex::new(vm_control_tube)),
450 })
451 }
452
is_driver_ready(&self) -> bool453 fn is_driver_ready(&self) -> bool {
454 let ready_bits = (VIRTIO_CONFIG_S_ACKNOWLEDGE
455 | VIRTIO_CONFIG_S_DRIVER
456 | VIRTIO_CONFIG_S_DRIVER_OK
457 | VIRTIO_CONFIG_S_FEATURES_OK) as u8;
458 (self.common_config.driver_status & ready_bits) == ready_bits
459 && self.common_config.driver_status & VIRTIO_CONFIG_S_FAILED as u8 == 0
460 }
461
462 /// Determines if the driver has requested the device reset itself
is_reset_requested(&self) -> bool463 fn is_reset_requested(&self) -> bool {
464 self.common_config.driver_status == DEVICE_RESET as u8
465 }
466
add_settings_pci_capabilities( &mut self, settings_bar: u8, ) -> std::result::Result<(), PciDeviceError>467 fn add_settings_pci_capabilities(
468 &mut self,
469 settings_bar: u8,
470 ) -> std::result::Result<(), PciDeviceError> {
471 // Add pointers to the different configuration structures from the PCI capabilities.
472 let common_cap = VirtioPciCap::new(
473 PciCapabilityType::CommonConfig,
474 settings_bar,
475 COMMON_CONFIG_BAR_OFFSET as u32,
476 COMMON_CONFIG_SIZE as u32,
477 );
478 self.config_regs
479 .add_capability(&common_cap, None)
480 .map_err(PciDeviceError::CapabilitiesSetup)?;
481
482 let isr_cap = VirtioPciCap::new(
483 PciCapabilityType::IsrConfig,
484 settings_bar,
485 ISR_CONFIG_BAR_OFFSET as u32,
486 ISR_CONFIG_SIZE as u32,
487 );
488 self.config_regs
489 .add_capability(&isr_cap, None)
490 .map_err(PciDeviceError::CapabilitiesSetup)?;
491
492 // TODO(dgreid) - set based on device's configuration size?
493 let device_cap = VirtioPciCap::new(
494 PciCapabilityType::DeviceConfig,
495 settings_bar,
496 DEVICE_CONFIG_BAR_OFFSET as u32,
497 DEVICE_CONFIG_SIZE as u32,
498 );
499 self.config_regs
500 .add_capability(&device_cap, None)
501 .map_err(PciDeviceError::CapabilitiesSetup)?;
502
503 let notify_cap = VirtioPciNotifyCap::new(
504 PciCapabilityType::NotifyConfig,
505 settings_bar,
506 NOTIFICATION_BAR_OFFSET as u32,
507 NOTIFICATION_SIZE as u32,
508 Le32::from(NOTIFY_OFF_MULTIPLIER),
509 );
510 self.config_regs
511 .add_capability(¬ify_cap, None)
512 .map_err(PciDeviceError::CapabilitiesSetup)?;
513
514 //TODO(dgreid) - How will the configuration_cap work?
515 let configuration_cap = VirtioPciCap::new(PciCapabilityType::PciConfig, 0, 0, 0);
516 self.config_regs
517 .add_capability(&configuration_cap, None)
518 .map_err(PciDeviceError::CapabilitiesSetup)?;
519
520 let msix_cap = MsixCap::new(
521 settings_bar,
522 self.msix_config.lock().num_vectors(),
523 MSIX_TABLE_BAR_OFFSET as u32,
524 settings_bar,
525 MSIX_PBA_BAR_OFFSET as u32,
526 );
527 self.config_regs
528 .add_capability(&msix_cap, Some(Box::new(self.msix_config.clone())))
529 .map_err(PciDeviceError::CapabilitiesSetup)?;
530
531 self.config_regs
532 .add_capability(&PciPmCap::new(), Some(Box::new(self.pm_config.clone())))
533 .map_err(PciDeviceError::CapabilitiesSetup)?;
534
535 self.settings_bar = settings_bar as PciBarIndex;
536 Ok(())
537 }
538
539 /// Activates the underlying `VirtioDevice`. `assign_irq` has to be called first.
activate(&mut self) -> anyhow::Result<()>540 fn activate(&mut self) -> anyhow::Result<()> {
541 let interrupt = Interrupt::new(
542 self.interrupt_evt
543 .as_ref()
544 .ok_or_else(|| anyhow!("{} interrupt_evt is none", self.debug_label()))?
545 .try_clone()
546 .with_context(|| format!("{} failed to clone interrupt_evt", self.debug_label()))?,
547 Some(self.msix_config.clone()),
548 self.common_config.msix_config,
549 #[cfg(target_arch = "x86_64")]
550 Some(PmWakeupEvent::new(
551 self.vm_control_tube.clone(),
552 self.pm_config.clone(),
553 MetricEventType::VirtioWakeup {
554 virtio_id: self.device.device_type() as u32,
555 },
556 )),
557 );
558 self.interrupt = Some(interrupt.clone());
559
560 let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
561 let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
562
563 // Use ready queues and their events.
564 let queues = self
565 .queues
566 .iter_mut()
567 .enumerate()
568 .zip(self.queue_evts.iter_mut())
569 .filter(|((_, q), _)| q.ready())
570 .map(|((queue_index, queue), evt)| {
571 if !evt.ioevent_registered {
572 self.ioevent_vm_memory_client
573 .register_io_event(
574 evt.event.try_clone().context("failed to clone Event")?,
575 notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
576 Datamatch::AnyLength,
577 )
578 .context("failed to register ioevent")?;
579 evt.ioevent_registered = true;
580 }
581 let queue_evt = evt.event.try_clone().context("failed to clone queue_evt")?;
582 Ok((
583 queue_index,
584 queue
585 .activate(&self.mem, queue_evt)
586 .context("failed to activate queue")?,
587 ))
588 })
589 .collect::<anyhow::Result<BTreeMap<usize, Queue>>>()?;
590
591 if let Err(e) = self.device.activate(self.mem.clone(), interrupt, queues) {
592 error!("{} activate failed: {:#}", self.debug_label(), e);
593 self.common_config.driver_status |= VIRTIO_CONFIG_S_NEEDS_RESET as u8;
594 } else {
595 self.device_activated = true;
596 }
597
598 Ok(())
599 }
600
unregister_ioevents(&mut self) -> anyhow::Result<()>601 fn unregister_ioevents(&mut self) -> anyhow::Result<()> {
602 let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
603 let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
604
605 for (queue_index, evt) in self.queue_evts.iter_mut().enumerate() {
606 if evt.ioevent_registered {
607 self.ioevent_vm_memory_client
608 .unregister_io_event(
609 evt.event.try_clone().context("failed to clone Event")?,
610 notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
611 Datamatch::AnyLength,
612 )
613 .context("failed to unregister ioevent")?;
614 evt.ioevent_registered = false;
615 }
616 }
617 Ok(())
618 }
619
virtio_device(&self) -> &dyn VirtioDevice620 pub fn virtio_device(&self) -> &dyn VirtioDevice {
621 self.device.as_ref()
622 }
623
pci_address(&self) -> Option<PciAddress>624 pub fn pci_address(&self) -> Option<PciAddress> {
625 self.pci_address
626 }
627
628 #[cfg(target_arch = "x86_64")]
handle_pm_status_change(&mut self, status: &PmStatusChange)629 fn handle_pm_status_change(&mut self, status: &PmStatusChange) {
630 if let Some(interrupt) = self.interrupt.as_mut() {
631 interrupt.set_wakeup_event_active(status.to == PciDevicePower::D3)
632 }
633 }
634
635 #[cfg(not(target_arch = "x86_64"))]
handle_pm_status_change(&mut self, _status: &PmStatusChange)636 fn handle_pm_status_change(&mut self, _status: &PmStatusChange) {}
637 }
638
639 impl PciDevice for VirtioPciDevice {
debug_label(&self) -> String640 fn debug_label(&self) -> String {
641 format!("pci{}", self.device.debug_label())
642 }
643
preferred_address(&self) -> Option<PciAddress>644 fn preferred_address(&self) -> Option<PciAddress> {
645 self.preferred_address
646 }
647
allocate_address( &mut self, resources: &mut SystemAllocator, ) -> std::result::Result<PciAddress, PciDeviceError>648 fn allocate_address(
649 &mut self,
650 resources: &mut SystemAllocator,
651 ) -> std::result::Result<PciAddress, PciDeviceError> {
652 if self.pci_address.is_none() {
653 if let Some(address) = self.preferred_address {
654 if !resources.reserve_pci(
655 Alloc::PciBar {
656 bus: address.bus,
657 dev: address.dev,
658 func: address.func,
659 bar: 0,
660 },
661 self.debug_label(),
662 ) {
663 return Err(PciDeviceError::PciAllocationFailed);
664 }
665 self.pci_address = Some(address);
666 } else {
667 self.pci_address = match resources.allocate_pci(0, self.debug_label()) {
668 Some(Alloc::PciBar {
669 bus,
670 dev,
671 func,
672 bar: _,
673 }) => Some(PciAddress { bus, dev, func }),
674 _ => None,
675 }
676 }
677 }
678 self.pci_address.ok_or(PciDeviceError::PciAllocationFailed)
679 }
680
keep_rds(&self) -> Vec<RawDescriptor>681 fn keep_rds(&self) -> Vec<RawDescriptor> {
682 let mut rds = self.device.keep_rds();
683 rds.extend(
684 self.queue_evts
685 .iter()
686 .map(|qe| qe.event.as_raw_descriptor()),
687 );
688 if let Some(interrupt_evt) = &self.interrupt_evt {
689 rds.extend(interrupt_evt.as_raw_descriptors());
690 }
691 let descriptor = self.msix_config.lock().get_msi_socket();
692 rds.push(descriptor);
693 if let Some(iommu) = &self.iommu {
694 rds.append(&mut iommu.lock().as_raw_descriptors());
695 }
696 rds.push(self.ioevent_vm_memory_client.as_raw_descriptor());
697 rds.push(self.vm_control_tube.lock().as_raw_descriptor());
698 rds
699 }
700
assign_irq(&mut self, irq_evt: IrqLevelEvent, pin: PciInterruptPin, irq_num: u32)701 fn assign_irq(&mut self, irq_evt: IrqLevelEvent, pin: PciInterruptPin, irq_num: u32) {
702 self.interrupt_evt = Some(irq_evt);
703 if !self.disable_intx {
704 self.config_regs.set_irq(irq_num as u8, pin);
705 }
706 }
707
allocate_io_bars( &mut self, resources: &mut SystemAllocator, ) -> std::result::Result<Vec<BarRange>, PciDeviceError>708 fn allocate_io_bars(
709 &mut self,
710 resources: &mut SystemAllocator,
711 ) -> std::result::Result<Vec<BarRange>, PciDeviceError> {
712 let device_type = self.device.device_type();
713 allocate_io_bars(
714 self,
715 |size: u64, alloc: Alloc, alloc_option: &AllocOptions| {
716 resources
717 .allocate_mmio(
718 size,
719 alloc,
720 format!("virtio-{}-cap_bar", device_type),
721 alloc_option,
722 )
723 .map_err(|e| PciDeviceError::IoAllocationFailed(size, e))
724 },
725 )
726 }
727
allocate_device_bars( &mut self, resources: &mut SystemAllocator, ) -> std::result::Result<Vec<BarRange>, PciDeviceError>728 fn allocate_device_bars(
729 &mut self,
730 resources: &mut SystemAllocator,
731 ) -> std::result::Result<Vec<BarRange>, PciDeviceError> {
732 let device_type = self.device.device_type();
733 allocate_device_bars(
734 self,
735 |size: u64, alloc: Alloc, alloc_option: &AllocOptions| {
736 resources
737 .allocate_mmio(
738 size,
739 alloc,
740 format!("virtio-{}-custom_bar", device_type),
741 alloc_option,
742 )
743 .map_err(|e| PciDeviceError::IoAllocationFailed(size, e))
744 },
745 )
746 }
747
destroy_device(&mut self)748 fn destroy_device(&mut self) {
749 if let Err(e) = self.unregister_ioevents() {
750 error!("error destroying {}: {:?}", &self.debug_label(), &e);
751 }
752 }
753
get_bar_configuration(&self, bar_num: usize) -> Option<PciBarConfiguration>754 fn get_bar_configuration(&self, bar_num: usize) -> Option<PciBarConfiguration> {
755 self.config_regs.get_bar_configuration(bar_num)
756 }
757
register_device_capabilities(&mut self) -> std::result::Result<(), PciDeviceError>758 fn register_device_capabilities(&mut self) -> std::result::Result<(), PciDeviceError> {
759 let mut caps = self.device.get_device_caps();
760 if let Some(region) = self.device.get_shared_memory_region() {
761 caps.push(Box::new(VirtioPciShmCap::new(
762 PciCapabilityType::SharedMemoryConfig,
763 SHMEM_BAR_NUM as u8,
764 0,
765 region.length,
766 region.id,
767 )));
768 }
769
770 for cap in caps {
771 self.config_regs
772 .add_capability(&*cap, None)
773 .map_err(PciDeviceError::CapabilitiesSetup)?;
774 }
775
776 Ok(())
777 }
778
read_config_register(&self, reg_idx: usize) -> u32779 fn read_config_register(&self, reg_idx: usize) -> u32 {
780 self.config_regs.read_reg(reg_idx)
781 }
782
write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8])783 fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) {
784 if let Some(res) = self.config_regs.write_reg(reg_idx, offset, data) {
785 if let Some(msix_behavior) = res.downcast_ref::<MsixStatus>() {
786 self.device.control_notify(*msix_behavior);
787 } else if let Some(status) = res.downcast_ref::<PmStatusChange>() {
788 self.handle_pm_status_change(status);
789 }
790 }
791 }
792
setup_pci_config_mapping( &mut self, shmem: &SharedMemory, base: usize, len: usize, ) -> std::result::Result<bool, PciDeviceError>793 fn setup_pci_config_mapping(
794 &mut self,
795 shmem: &SharedMemory,
796 base: usize,
797 len: usize,
798 ) -> std::result::Result<bool, PciDeviceError> {
799 self.config_regs
800 .setup_mapping(shmem, base, len)
801 .map(|_| true)
802 .map_err(PciDeviceError::MmioSetup)
803 }
804
read_bar(&mut self, bar_index: usize, offset: u64, data: &mut [u8])805 fn read_bar(&mut self, bar_index: usize, offset: u64, data: &mut [u8]) {
806 if bar_index == self.settings_bar {
807 match offset {
808 COMMON_CONFIG_BAR_OFFSET..=COMMON_CONFIG_LAST => self.common_config.read(
809 offset - COMMON_CONFIG_BAR_OFFSET,
810 data,
811 &mut self.queues,
812 self.device.as_mut(),
813 ),
814 ISR_CONFIG_BAR_OFFSET..=ISR_CONFIG_LAST => {
815 if let Some(v) = data.get_mut(0) {
816 // Reading this register resets it to 0.
817 *v = if let Some(interrupt) = &self.interrupt {
818 interrupt.read_and_reset_interrupt_status()
819 } else {
820 0
821 };
822 }
823 }
824 DEVICE_CONFIG_BAR_OFFSET..=DEVICE_CONFIG_LAST => {
825 self.device
826 .read_config(offset - DEVICE_CONFIG_BAR_OFFSET, data);
827 }
828 NOTIFICATION_BAR_OFFSET..=NOTIFICATION_LAST => {
829 // Handled with ioevents.
830 }
831 MSIX_TABLE_BAR_OFFSET..=MSIX_TABLE_LAST => {
832 self.msix_config
833 .lock()
834 .read_msix_table(offset - MSIX_TABLE_BAR_OFFSET, data);
835 }
836 MSIX_PBA_BAR_OFFSET..=MSIX_PBA_LAST => {
837 self.msix_config
838 .lock()
839 .read_pba_entries(offset - MSIX_PBA_BAR_OFFSET, data);
840 }
841 _ => (),
842 }
843 } else {
844 self.device.read_bar(bar_index, offset, data);
845 }
846 }
847
write_bar(&mut self, bar_index: usize, offset: u64, data: &[u8])848 fn write_bar(&mut self, bar_index: usize, offset: u64, data: &[u8]) {
849 if bar_index == self.settings_bar {
850 match offset {
851 COMMON_CONFIG_BAR_OFFSET..=COMMON_CONFIG_LAST => self.common_config.write(
852 offset - COMMON_CONFIG_BAR_OFFSET,
853 data,
854 &mut self.queues,
855 self.device.as_mut(),
856 ),
857 ISR_CONFIG_BAR_OFFSET..=ISR_CONFIG_LAST => {
858 if let Some(v) = data.first() {
859 if let Some(interrupt) = &self.interrupt {
860 interrupt.clear_interrupt_status_bits(*v);
861 }
862 }
863 }
864 DEVICE_CONFIG_BAR_OFFSET..=DEVICE_CONFIG_LAST => {
865 self.device
866 .write_config(offset - DEVICE_CONFIG_BAR_OFFSET, data);
867 }
868 NOTIFICATION_BAR_OFFSET..=NOTIFICATION_LAST => {
869 // Notifications are normally handled with ioevents inside the hypervisor and
870 // do not reach write_bar(). However, if the ioevent registration hasn't
871 // finished yet, it is possible for a write to the notification region to make
872 // it through as a normal MMIO exit and end up here. To handle that case,
873 // provide a fallback that looks up the corresponding queue for the offset and
874 // triggers its event, which is equivalent to what the ioevent would do.
875 let queue_index = (offset - NOTIFICATION_BAR_OFFSET) as usize
876 / NOTIFY_OFF_MULTIPLIER as usize;
877 trace!("write_bar notification fallback for queue {}", queue_index);
878 if let Some(evt) = self.queue_evts.get(queue_index) {
879 let _ = evt.event.signal();
880 }
881 }
882 MSIX_TABLE_BAR_OFFSET..=MSIX_TABLE_LAST => {
883 let behavior = self
884 .msix_config
885 .lock()
886 .write_msix_table(offset - MSIX_TABLE_BAR_OFFSET, data);
887 self.device.control_notify(behavior);
888 }
889 MSIX_PBA_BAR_OFFSET..=MSIX_PBA_LAST => {
890 self.msix_config
891 .lock()
892 .write_pba_entries(offset - MSIX_PBA_BAR_OFFSET, data);
893 }
894 _ => (),
895 }
896 } else {
897 self.device.write_bar(bar_index, offset, data);
898 }
899
900 if !self.device_activated && self.is_driver_ready() {
901 if let Err(e) = self.activate() {
902 error!("failed to activate device: {:#}", e);
903 }
904 }
905
906 // Device has been reset by the driver
907 if self.device_activated && self.is_reset_requested() {
908 if let Err(e) = self.device.reset() {
909 error!("failed to reset {} device: {:#}", self.debug_label(), e);
910 } else {
911 self.device_activated = false;
912 // reset queues
913 self.queues.iter_mut().for_each(QueueConfig::reset);
914 // select queue 0 by default
915 self.common_config.queue_select = 0;
916 if let Err(e) = self.unregister_ioevents() {
917 error!("failed to unregister ioevents: {:#}", e);
918 }
919 }
920 }
921 }
922
on_device_sandboxed(&mut self)923 fn on_device_sandboxed(&mut self) {
924 self.device.on_device_sandboxed();
925 }
926
927 #[cfg(target_arch = "x86_64")]
generate_acpi(&mut self, sdts: Vec<SDT>) -> Option<Vec<SDT>>928 fn generate_acpi(&mut self, sdts: Vec<SDT>) -> Option<Vec<SDT>> {
929 self.device.generate_acpi(&self.pci_address, sdts)
930 }
931
as_virtio_pci_device(&self) -> Option<&VirtioPciDevice>932 fn as_virtio_pci_device(&self) -> Option<&VirtioPciDevice> {
933 Some(self)
934 }
935 }
936
allocate_io_bars<F>( virtio_pci_device: &mut VirtioPciDevice, mut alloc_fn: F, ) -> std::result::Result<Vec<BarRange>, PciDeviceError> where F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,937 fn allocate_io_bars<F>(
938 virtio_pci_device: &mut VirtioPciDevice,
939 mut alloc_fn: F,
940 ) -> std::result::Result<Vec<BarRange>, PciDeviceError>
941 where
942 F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,
943 {
944 let address = virtio_pci_device
945 .pci_address
946 .expect("allocate_address must be called prior to allocate_io_bars");
947 // Allocate one bar for the structures pointed to by the capability structures.
948 let settings_config_addr = alloc_fn(
949 CAPABILITY_BAR_SIZE,
950 Alloc::PciBar {
951 bus: address.bus,
952 dev: address.dev,
953 func: address.func,
954 bar: 0,
955 },
956 AllocOptions::new()
957 .max_address(u32::MAX.into())
958 .align(CAPABILITY_BAR_SIZE),
959 )?;
960 let config = PciBarConfiguration::new(
961 CAPABILITIES_BAR_NUM,
962 CAPABILITY_BAR_SIZE,
963 PciBarRegionType::Memory32BitRegion,
964 PciBarPrefetchable::NotPrefetchable,
965 )
966 .set_address(settings_config_addr);
967 let settings_bar = virtio_pci_device
968 .config_regs
969 .add_pci_bar(config)
970 .map_err(|e| PciDeviceError::IoRegistrationFailed(settings_config_addr, e))?
971 as u8;
972 // Once the BARs are allocated, the capabilities can be added to the PCI configuration.
973 virtio_pci_device.add_settings_pci_capabilities(settings_bar)?;
974
975 Ok(vec![BarRange {
976 addr: settings_config_addr,
977 size: CAPABILITY_BAR_SIZE,
978 prefetchable: false,
979 }])
980 }
981
allocate_device_bars<F>( virtio_pci_device: &mut VirtioPciDevice, mut alloc_fn: F, ) -> std::result::Result<Vec<BarRange>, PciDeviceError> where F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,982 fn allocate_device_bars<F>(
983 virtio_pci_device: &mut VirtioPciDevice,
984 mut alloc_fn: F,
985 ) -> std::result::Result<Vec<BarRange>, PciDeviceError>
986 where
987 F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,
988 {
989 let address = virtio_pci_device
990 .pci_address
991 .expect("allocate_address must be called prior to allocate_device_bars");
992
993 let configs = virtio_pci_device.device.get_device_bars(address);
994 let configs = if !configs.is_empty() {
995 configs
996 } else {
997 let region = match virtio_pci_device.device.get_shared_memory_region() {
998 None => return Ok(Vec::new()),
999 Some(r) => r,
1000 };
1001 let config = PciBarConfiguration::new(
1002 SHMEM_BAR_NUM,
1003 region
1004 .length
1005 .checked_next_power_of_two()
1006 .expect("bar too large"),
1007 PciBarRegionType::Memory64BitRegion,
1008 PciBarPrefetchable::Prefetchable,
1009 );
1010
1011 let alloc = Alloc::PciBar {
1012 bus: address.bus,
1013 dev: address.dev,
1014 func: address.func,
1015 bar: config.bar_index() as u8,
1016 };
1017
1018 virtio_pci_device
1019 .device
1020 .set_shared_memory_mapper(Box::new(VmRequester::new(
1021 virtio_pci_device
1022 .shared_memory_vm_memory_client
1023 .take()
1024 .expect("missing shared_memory_tube"),
1025 alloc,
1026 // See comment VmMemoryRequest::execute
1027 !virtio_pci_device
1028 .device
1029 .expose_shmem_descriptors_with_viommu(),
1030 )));
1031
1032 vec![config]
1033 };
1034 let mut ranges = vec![];
1035 for config in configs {
1036 let device_addr = alloc_fn(
1037 config.size(),
1038 Alloc::PciBar {
1039 bus: address.bus,
1040 dev: address.dev,
1041 func: address.func,
1042 bar: config.bar_index() as u8,
1043 },
1044 AllocOptions::new()
1045 .prefetchable(config.is_prefetchable())
1046 .align(config.size()),
1047 )?;
1048 let config = config.set_address(device_addr);
1049 let _device_bar = virtio_pci_device
1050 .config_regs
1051 .add_pci_bar(config)
1052 .map_err(|e| PciDeviceError::IoRegistrationFailed(device_addr, e))?;
1053 ranges.push(BarRange {
1054 addr: device_addr,
1055 size: config.size(),
1056 prefetchable: false,
1057 });
1058 }
1059
1060 if virtio_pci_device
1061 .device
1062 .get_shared_memory_region()
1063 .is_some()
1064 {
1065 virtio_pci_device
1066 .device
1067 .set_shared_memory_region_base(GuestAddress(ranges[0].addr));
1068 }
1069
1070 Ok(ranges)
1071 }
1072
1073 #[cfg(feature = "pci-hotplug")]
1074 impl HotPluggable for VirtioPciDevice {
1075 /// Sets PciAddress to pci_addr
set_pci_address(&mut self, pci_addr: PciAddress) -> std::result::Result<(), PciDeviceError>1076 fn set_pci_address(&mut self, pci_addr: PciAddress) -> std::result::Result<(), PciDeviceError> {
1077 self.pci_address = Some(pci_addr);
1078 Ok(())
1079 }
1080
1081 /// Configures IO BAR layout without memory alloc.
configure_io_bars(&mut self) -> std::result::Result<(), PciDeviceError>1082 fn configure_io_bars(&mut self) -> std::result::Result<(), PciDeviceError> {
1083 let mut simple_allocator = SimpleAllocator::new(0);
1084 allocate_io_bars(self, |size, _, _| simple_allocator.alloc(size, size)).map(|_| ())
1085 }
1086
1087 /// Configure device BAR layout without memory alloc.
configure_device_bars(&mut self) -> std::result::Result<(), PciDeviceError>1088 fn configure_device_bars(&mut self) -> std::result::Result<(), PciDeviceError> {
1089 // For device BAR, the space for CAPABILITY_BAR_SIZE should be skipped.
1090 let mut simple_allocator = SimpleAllocator::new(CAPABILITY_BAR_SIZE);
1091 allocate_device_bars(self, |size, _, _| simple_allocator.alloc(size, size)).map(|_| ())
1092 }
1093 }
1094
1095 #[cfg(feature = "pci-hotplug")]
1096 /// A simple allocator that can allocate non-overlapping aligned intervals.
1097 ///
1098 /// The addresses allocated are not exclusively reserved for the device, and cannot be used for a
1099 /// static device. The allocated placeholder address describes the layout of PCI BAR for hotplugged
1100 /// devices. Actual memory allocation is handled by PCI BAR reprogramming initiated by guest OS.
1101 struct SimpleAllocator {
1102 current_address: u64,
1103 }
1104
1105 #[cfg(feature = "pci-hotplug")]
1106 impl SimpleAllocator {
1107 /// Constructs SimpleAllocator. Address will start at or after base_address.
new(base_address: u64) -> Self1108 fn new(base_address: u64) -> Self {
1109 Self {
1110 current_address: base_address,
1111 }
1112 }
1113
1114 /// Allocate memory with size and align. Returns the start of address.
alloc(&mut self, size: u64, align: u64) -> std::result::Result<u64, PciDeviceError>1115 fn alloc(&mut self, size: u64, align: u64) -> std::result::Result<u64, PciDeviceError> {
1116 if align > 0 {
1117 // aligns current_address upward to align.
1118 self.current_address = (self.current_address + align - 1) / align * align;
1119 }
1120 let start_address = self.current_address;
1121 self.current_address += size;
1122 Ok(start_address)
1123 }
1124 }
1125
1126 impl Suspendable for VirtioPciDevice {
sleep(&mut self) -> anyhow::Result<()>1127 fn sleep(&mut self) -> anyhow::Result<()> {
1128 // If the device is already asleep, we should not request it to sleep again.
1129 if self.sleep_state.is_some() {
1130 return Ok(());
1131 }
1132
1133 // Don't call `self.device.virtio_sleep()` for vhost user devices if the device is not
1134 // activated yet, since it will always return an empty Vec.
1135 if !self.device_activated && self.device.is_vhost_user() {
1136 // This will need to be set, so that a cold restore will work.
1137 self.sleep_state = Some(SleepState::Inactive);
1138 return Ok(());
1139 }
1140 if let Some(queues) = self.device.virtio_sleep()? {
1141 anyhow::ensure!(
1142 self.device_activated,
1143 format!(
1144 "unactivated device {} returned queues on sleep",
1145 self.debug_label()
1146 ),
1147 );
1148 self.sleep_state = Some(SleepState::Active {
1149 activated_queues: queues,
1150 });
1151 } else {
1152 anyhow::ensure!(
1153 !self.device_activated,
1154 format!(
1155 "activated device {} didn't return queues on sleep",
1156 self.debug_label()
1157 ),
1158 );
1159 self.sleep_state = Some(SleepState::Inactive);
1160 }
1161 Ok(())
1162 }
1163
wake(&mut self) -> anyhow::Result<()>1164 fn wake(&mut self) -> anyhow::Result<()> {
1165 // A vhost user device that isn't activated doesn't need to be woken up.
1166 if !self.device_activated && self.device.is_vhost_user() {
1167 self.sleep_state = None;
1168 return Ok(());
1169 }
1170 match self.sleep_state.take() {
1171 None => {
1172 // If the device is already awake, we should not request it to wake again.
1173 }
1174 Some(SleepState::Inactive) => {
1175 self.device.virtio_wake(None).with_context(|| {
1176 format!(
1177 "virtio_wake failed for {}, can't recover",
1178 self.debug_label(),
1179 )
1180 })?;
1181 }
1182 Some(SleepState::Active { activated_queues }) => {
1183 self.device
1184 .virtio_wake(Some((
1185 self.mem.clone(),
1186 self.interrupt
1187 .clone()
1188 .expect("interrupt missing for already active queues"),
1189 activated_queues,
1190 )))
1191 .with_context(|| {
1192 format!(
1193 "virtio_wake failed for {}, can't recover",
1194 self.debug_label(),
1195 )
1196 })?;
1197 }
1198 };
1199 Ok(())
1200 }
1201
snapshot(&mut self) -> anyhow::Result<serde_json::Value>1202 fn snapshot(&mut self) -> anyhow::Result<serde_json::Value> {
1203 if self.iommu.is_some() {
1204 return Err(anyhow!("Cannot snapshot if iommu is present."));
1205 }
1206
1207 serde_json::to_value(VirtioPciDeviceSnapshot {
1208 config_regs: self.config_regs.snapshot()?,
1209 inner_device: self.device.virtio_snapshot()?,
1210 device_activated: self.device_activated,
1211 interrupt: self.interrupt.as_ref().map(|i| i.snapshot()),
1212 msix_config: self.msix_config.lock().snapshot()?,
1213 common_config: self.common_config,
1214 queues: self
1215 .queues
1216 .iter()
1217 .map(|q| q.snapshot())
1218 .collect::<anyhow::Result<Vec<_>>>()?,
1219 activated_queues: match &self.sleep_state {
1220 None => {
1221 anyhow::bail!("tried snapshotting while awake")
1222 }
1223 Some(SleepState::Inactive) => None,
1224 Some(SleepState::Active { activated_queues }) => {
1225 let mut serialized_queues = Vec::new();
1226 for (index, queue) in activated_queues.iter() {
1227 serialized_queues.push((*index, queue.snapshot()?));
1228 }
1229 Some(serialized_queues)
1230 }
1231 },
1232 })
1233 .context("failed to serialize VirtioPciDeviceSnapshot")
1234 }
1235
restore(&mut self, data: serde_json::Value) -> anyhow::Result<()>1236 fn restore(&mut self, data: serde_json::Value) -> anyhow::Result<()> {
1237 // Restoring from an activated state is more complex and low priority, so just fail for
1238 // now. We'll need to reset the device before restoring, e.g. must call
1239 // self.unregister_ioevents().
1240 anyhow::ensure!(
1241 !self.device_activated,
1242 "tried to restore after virtio device activated. not supported yet"
1243 );
1244
1245 let deser: VirtioPciDeviceSnapshot = serde_json::from_value(data)?;
1246
1247 self.config_regs.restore(deser.config_regs)?;
1248 self.device_activated = deser.device_activated;
1249
1250 self.msix_config.lock().restore(deser.msix_config)?;
1251 self.common_config = deser.common_config;
1252
1253 assert_eq!(
1254 self.queues.len(),
1255 deser.queues.len(),
1256 "device must have the same number of queues"
1257 );
1258 for (q, s) in self.queues.iter_mut().zip(deser.queues.into_iter()) {
1259 q.restore(s)?;
1260 }
1261
1262 // Verify we are asleep and inactive.
1263 match &self.sleep_state {
1264 None => {
1265 anyhow::bail!("tried restoring while awake")
1266 }
1267 Some(SleepState::Inactive) => {}
1268 Some(SleepState::Active { .. }) => {
1269 anyhow::bail!("tried to restore after virtio device activated. not supported yet")
1270 }
1271 };
1272 // Restore `sleep_state`.
1273 if let Some(activated_queues_snapshot) = deser.activated_queues {
1274 let mut activated_queues = BTreeMap::new();
1275 for (index, queue_snapshot) in activated_queues_snapshot {
1276 let queue_config = self
1277 .queues
1278 .get(index)
1279 .with_context(|| format!("missing queue config for activated queue {index}"))?;
1280 let queue_evt = self
1281 .queue_evts
1282 .get(index)
1283 .with_context(|| format!("missing queue event for activated queue {index}"))?
1284 .event
1285 .try_clone()
1286 .context("failed to clone queue event")?;
1287 activated_queues.insert(
1288 index,
1289 Queue::restore(queue_config, queue_snapshot, &self.mem, queue_evt)?,
1290 );
1291 }
1292
1293 // Restore the activated queues.
1294 self.sleep_state = Some(SleepState::Active { activated_queues });
1295 } else {
1296 self.sleep_state = Some(SleepState::Inactive);
1297 }
1298
1299 // Also replicate the other work in activate: initialize the interrupt and queues
1300 // events. This could just as easily be done in `wake` instead.
1301 // NOTE: Needs to be done last in `restore` because it relies on the other VirtioPciDevice
1302 // fields.
1303 if let Some(deser_interrupt) = deser.interrupt {
1304 self.interrupt = Some(Interrupt::new_from_snapshot(
1305 self.interrupt_evt
1306 .as_ref()
1307 .ok_or_else(|| anyhow!("{} interrupt_evt is none", self.debug_label()))?
1308 .try_clone()
1309 .with_context(|| {
1310 format!("{} failed to clone interrupt_evt", self.debug_label())
1311 })?,
1312 Some(self.msix_config.clone()),
1313 self.common_config.msix_config,
1314 deser_interrupt,
1315 #[cfg(target_arch = "x86_64")]
1316 Some(PmWakeupEvent::new(
1317 self.vm_control_tube.clone(),
1318 self.pm_config.clone(),
1319 MetricEventType::VirtioWakeup {
1320 virtio_id: self.device.device_type() as u32,
1321 },
1322 )),
1323 ));
1324 }
1325
1326 // Call register_io_events for the activated queue events.
1327 let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
1328 let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
1329 self.queues
1330 .iter()
1331 .enumerate()
1332 .zip(self.queue_evts.iter_mut())
1333 .filter(|((_, q), _)| q.ready())
1334 .try_for_each(|((queue_index, _queue), evt)| {
1335 if !evt.ioevent_registered {
1336 self.ioevent_vm_memory_client
1337 .register_io_event(
1338 evt.event.try_clone().context("failed to clone Event")?,
1339 notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
1340 Datamatch::AnyLength,
1341 )
1342 .context("failed to register ioevent")?;
1343 evt.ioevent_registered = true;
1344 }
1345 Ok::<(), anyhow::Error>(())
1346 })?;
1347
1348 // There might be data in the queue that wasn't drained by the device
1349 // at the time it was snapshotted. In this case, the doorbell should
1350 // still be signaled. If it is not, the driver may never re-trigger the
1351 // doorbell, and the device will stall. So here, we explicitly signal
1352 // every doorbell. Spurious doorbells are safe (devices will check their
1353 // queue, realize nothing is there, and go back to sleep.)
1354 self.queue_evts.iter_mut().try_for_each(|queue_event| {
1355 queue_event
1356 .event
1357 .signal()
1358 .context("failed to wake doorbell")
1359 })?;
1360
1361 if self.device.is_vhost_user() {
1362 let (queue_evts, interrupt) = if self.device_activated {
1363 (
1364 Some(
1365 self.queue_evts
1366 .iter()
1367 .map(|queue_evt| {
1368 queue_evt
1369 .event
1370 .try_clone()
1371 .context("Failed to clone queue_evt")
1372 })
1373 .collect::<anyhow::Result<Vec<_>>>()?,
1374 ),
1375 Some(
1376 self.interrupt
1377 .as_ref()
1378 .expect("Interrupt should not be empty if device was activated.")
1379 .clone(),
1380 ),
1381 )
1382 } else {
1383 (None, None)
1384 };
1385 self.device.vhost_user_restore(
1386 deser.inner_device,
1387 &self.queues,
1388 queue_evts,
1389 interrupt,
1390 self.mem.clone(),
1391 &self.msix_config,
1392 self.device_activated,
1393 )?;
1394 } else {
1395 self.device.virtio_restore(deser.inner_device)?;
1396 }
1397
1398 Ok(())
1399 }
1400 }
1401
1402 struct VmRequester {
1403 vm_memory_client: VmMemoryClient,
1404 alloc: Alloc,
1405 mappings: BTreeMap<u64, VmMemoryRegionId>,
1406 needs_prepare: bool,
1407 }
1408
1409 impl VmRequester {
new(vm_memory_client: VmMemoryClient, alloc: Alloc, do_prepare: bool) -> Self1410 fn new(vm_memory_client: VmMemoryClient, alloc: Alloc, do_prepare: bool) -> Self {
1411 Self {
1412 vm_memory_client,
1413 alloc,
1414 mappings: BTreeMap::new(),
1415 needs_prepare: do_prepare,
1416 }
1417 }
1418 }
1419
1420 impl SharedMemoryMapper for VmRequester {
add_mapping( &mut self, source: VmMemorySource, offset: u64, prot: Protection, cache: MemCacheType, ) -> anyhow::Result<()>1421 fn add_mapping(
1422 &mut self,
1423 source: VmMemorySource,
1424 offset: u64,
1425 prot: Protection,
1426 cache: MemCacheType,
1427 ) -> anyhow::Result<()> {
1428 if self.needs_prepare {
1429 self.vm_memory_client
1430 .prepare_shared_memory_region(self.alloc, cache)
1431 .context("prepare_shared_memory_region failed")?;
1432 self.needs_prepare = false;
1433 }
1434
1435 let id = self
1436 .vm_memory_client
1437 .register_memory(
1438 source,
1439 VmMemoryDestination::ExistingAllocation {
1440 allocation: self.alloc,
1441 offset,
1442 },
1443 prot,
1444 cache,
1445 )
1446 .context("register_memory failed")?;
1447
1448 self.mappings.insert(offset, id);
1449 Ok(())
1450 }
1451
remove_mapping(&mut self, offset: u64) -> anyhow::Result<()>1452 fn remove_mapping(&mut self, offset: u64) -> anyhow::Result<()> {
1453 let id = self.mappings.remove(&offset).context("invalid offset")?;
1454 self.vm_memory_client
1455 .unregister_memory(id)
1456 .context("unregister_memory failed")
1457 }
1458
as_raw_descriptor(&self) -> Option<RawDescriptor>1459 fn as_raw_descriptor(&self) -> Option<RawDescriptor> {
1460 Some(self.vm_memory_client.as_raw_descriptor())
1461 }
1462 }
1463
1464 #[cfg(test)]
1465 mod tests {
1466
1467 #[cfg(feature = "pci-hotplug")]
1468 #[test]
allocate_aligned_address()1469 fn allocate_aligned_address() {
1470 let mut simple_allocator = super::SimpleAllocator::new(0);
1471 // start at 0, aligned to 0x80. Interval end at 0x20.
1472 assert_eq!(simple_allocator.alloc(0x20, 0x80).unwrap(), 0);
1473 // 0x20 => start at 0x40. Interval end at 0x80.
1474 assert_eq!(simple_allocator.alloc(0x40, 0x40).unwrap(), 0x40);
1475 // 0x80 => start at 0x80, Interval end at 0x108.
1476 assert_eq!(simple_allocator.alloc(0x88, 0x80).unwrap(), 0x80);
1477 // 0x108 => start at 0x180. Interval end at 0x1b0.
1478 assert_eq!(simple_allocator.alloc(0x30, 0x80).unwrap(), 0x180);
1479 }
1480 }
1481