1 // Copyright 2021 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 //! Implement a userspace PCI device driver for the virtio vhost-user device.
6
7 use std::str::FromStr;
8 use std::sync::Arc;
9 use std::time::Duration;
10 use std::time::Instant;
11
12 use anyhow::anyhow;
13 use anyhow::bail;
14 use anyhow::Context;
15 use anyhow::Result;
16 use base::info;
17 use base::Event;
18 use base::MemoryMapping;
19 use base::MemoryMappingBuilder;
20 use memoffset::offset_of;
21 use resources::Alloc;
22 use vfio_sys::*;
23 use virtio_sys::virtio_config;
24 use virtio_sys::virtio_config::VIRTIO_F_VERSION_1;
25 use zerocopy::AsBytes;
26 use zerocopy::FromBytes;
27
28 use crate::pci::MsixCap;
29 use crate::pci::PciAddress;
30 use crate::pci::PciCapabilityID;
31 use crate::pci::CAPABILITY_LIST_HEAD_OFFSET;
32 use crate::vfio::VfioDevice;
33 use crate::vfio::VfioPciConfig;
34 use crate::vfio::VfioRegionAddr;
35 use crate::virtio::vhost::user::device::vvu::bus::open_vfio_device;
36 use crate::virtio::vhost::user::device::vvu::queue::DescTableAddrs;
37 use crate::virtio::vhost::user::device::vvu::queue::IovaAllocator;
38 use crate::virtio::vhost::user::device::vvu::queue::UserQueue;
39 use crate::virtio::PciCapabilityType;
40 use crate::virtio::VirtioPciCap;
41
42 const VIRTIO_CONFIG_STATUS_RESET: u8 = 0;
43
get_pci_cap_addr(cap: &VirtioPciCap) -> Result<VfioRegionAddr>44 fn get_pci_cap_addr(cap: &VirtioPciCap) -> Result<VfioRegionAddr> {
45 const PCI_MAX_RESOURCE: u8 = 6;
46
47 if cap.bar >= PCI_MAX_RESOURCE {
48 bail!("invalid bar: {:?} >= {}", cap.bar, PCI_MAX_RESOURCE);
49 }
50
51 if u32::from(cap.offset)
52 .checked_add(u32::from(cap.length))
53 .is_none()
54 {
55 bail!("overflow: {:?} + {:?}", cap.offset, cap.length);
56 }
57
58 Ok(VfioRegionAddr {
59 index: cap.bar.into(),
60 addr: u32::from(cap.offset) as u64,
61 })
62 }
63
64 #[repr(C)]
65 #[derive(Debug, Default, Copy, Clone, AsBytes, FromBytes)]
66 /// VirtIO spec: 4.1.4.3 Common configuration structure layout
67 struct virtio_pci_common_cfg {
68 // For the whole device.
69 device_feature_select: u32,
70 device_feature: u32,
71 guest_feature_select: u32,
72 guest_feature: u32,
73 msix_config: u16,
74 num_queues: u16,
75 device_status: u8,
76 config_generation: u8,
77
78 // For a specific virtqueue.
79 queue_select: u16,
80 queue_size: u16,
81 queue_msix_vector: u16,
82 queue_enable: u16,
83 queue_notify_off: u16,
84 queue_desc_lo: u32,
85 queue_desc_hi: u32,
86 queue_avail_lo: u32,
87 queue_avail_hi: u32,
88 queue_used_lo: u32,
89 queue_used_hi: u32,
90 }
91
92 #[repr(C)]
93 #[derive(Debug, Default, Copy, Clone, AsBytes, FromBytes)]
94 struct virtio_pci_notification_cfg {
95 notification_select: u16,
96 notification_msix_vector: u16,
97 }
98
99 #[derive(Clone)]
100 pub struct VvuPciCaps {
101 msix_table_size: u16,
102 common_cfg_addr: VfioRegionAddr,
103 notify_off_multiplier: u32,
104 notify_base_addr: VfioRegionAddr,
105 dev_cfg_addr: VfioRegionAddr,
106 isr_addr: VfioRegionAddr,
107 doorbell_off_multiplier: u32,
108 doorbell_base_addr: VfioRegionAddr,
109 notify_cfg_addr: VfioRegionAddr,
110 shared_mem_cfg_addr: VfioRegionAddr,
111 }
112
113 impl VvuPciCaps {
new(config: &VfioPciConfig) -> Result<Self>114 pub fn new(config: &VfioPciConfig) -> Result<Self> {
115 // Safe because zero is valid for every field in `VvuPciCaps`.
116 let mut caps: Self = unsafe { std::mem::zeroed() };
117
118 // Read PCI capability config one by one and set up each of them.
119 let mut pos: u8 = config.read_config(CAPABILITY_LIST_HEAD_OFFSET as u32);
120 while pos != 0 {
121 let cfg: [u8; 2] = config.read_config(pos.into());
122 let (cap_id, cap_next) = (cfg[0], cfg[1]);
123
124 if cap_id == PciCapabilityID::Msix as u8 {
125 let cap = config.read_config::<MsixCap>(pos.into());
126 // According to PCI 3.0 specification section 6.8.2.3 ("Message Control for MSI-X"),
127 // MSI-X Table Size N, which is encoded as N-1.
128 caps.msix_table_size = cap.msg_ctl().get_table_size() + 1;
129 }
130
131 if cap_id != PciCapabilityID::VendorSpecific as u8 {
132 pos = cap_next;
133 continue;
134 }
135
136 let cap: VirtioPciCap = config.read_config(pos.into());
137
138 let cfg = PciCapabilityType::n(cap.cfg_type)
139 .ok_or_else(|| anyhow!("invalid cfg_type: {}", cap.cfg_type))?;
140 match cfg {
141 PciCapabilityType::CommonConfig => {
142 caps.common_cfg_addr = get_pci_cap_addr(&cap)?;
143 }
144 PciCapabilityType::NotifyConfig => {
145 caps.notify_off_multiplier =
146 config.read_config(pos as u32 + std::mem::size_of::<VirtioPciCap>() as u32);
147 caps.notify_base_addr = get_pci_cap_addr(&cap)?;
148 }
149 PciCapabilityType::IsrConfig => {
150 caps.isr_addr = get_pci_cap_addr(&cap)?;
151 }
152 PciCapabilityType::DeviceConfig => {
153 caps.dev_cfg_addr = get_pci_cap_addr(&cap)?;
154 }
155 PciCapabilityType::PciConfig => {
156 // do nothing
157 }
158 PciCapabilityType::DoorbellConfig => {
159 caps.doorbell_off_multiplier =
160 config.read_config(pos as u32 + std::mem::size_of::<VirtioPciCap>() as u32);
161 caps.doorbell_base_addr = get_pci_cap_addr(&cap)?;
162 }
163 PciCapabilityType::NotificationConfig => {
164 caps.notify_cfg_addr = get_pci_cap_addr(&cap)?;
165 }
166 PciCapabilityType::SharedMemoryConfig => {
167 caps.shared_mem_cfg_addr = get_pci_cap_addr(&cap)?;
168 }
169 }
170
171 pos = cap.cap_next;
172 }
173
174 Ok(caps)
175 }
176
doorbell_off_multiplier(&self) -> u32177 pub fn doorbell_off_multiplier(&self) -> u32 {
178 self.doorbell_off_multiplier
179 }
180
doorbell_base_addr(&self) -> &VfioRegionAddr181 pub fn doorbell_base_addr(&self) -> &VfioRegionAddr {
182 &self.doorbell_base_addr
183 }
184
shared_mem_cfg_addr(&self) -> &VfioRegionAddr185 pub fn shared_mem_cfg_addr(&self) -> &VfioRegionAddr {
186 &self.shared_mem_cfg_addr
187 }
188 }
189
190 macro_rules! write_common_cfg_field {
191 ($device:expr, $field:ident, $val:expr) => {
192 $device.vfio_dev.region_write_to_addr(
193 &$val,
194 &$device.caps.common_cfg_addr,
195 offset_of!(virtio_pci_common_cfg, $field) as u64,
196 )
197 };
198 }
199
200 macro_rules! read_common_cfg_field {
201 ($device:expr, $field:ident) => {
202 $device.vfio_dev.region_read_from_addr(
203 &$device.caps.common_cfg_addr,
204 offset_of!(virtio_pci_common_cfg, $field) as u64,
205 )
206 };
207 }
208
209 macro_rules! write_notify_cfg_field {
210 ($device:expr, $mmap:expr, $field:ident, $val:expr) => {
211 $mmap.write_obj_volatile(
212 $val,
213 $device.caps.notify_cfg_addr.addr as usize
214 + offset_of!(virtio_pci_notification_cfg, $field),
215 )
216 };
217 }
218
219 macro_rules! read_notify_cfg_field {
220 ($device:expr, $mmap:expr, $field:ident) => {
221 $mmap.read_obj_volatile(
222 $device.caps.notify_cfg_addr.addr as usize
223 + offset_of!(virtio_pci_notification_cfg, $field),
224 )
225 };
226 }
227
228 /// A VVU notification resource which works as an interrupt for a virtqueue.
229 pub struct QueueNotifier {
230 addr: u64,
231 mmap: MemoryMapping,
232 }
233
234 impl QueueNotifier {
235 /// Initialize a new QueueNotifier structure given the queue index, the vfio
236 /// device, and the VvuPciCaps.
new( queue_type: QueueType, device: &Arc<VfioDevice>, caps: &VvuPciCaps, ) -> Result<QueueNotifier>237 pub fn new(
238 queue_type: QueueType,
239 device: &Arc<VfioDevice>,
240 caps: &VvuPciCaps,
241 ) -> Result<QueueNotifier> {
242 let addr =
243 caps.notify_base_addr.addr + (queue_type as u64 * caps.notify_off_multiplier as u64);
244 let mmap_region = device.get_region_mmap(caps.notify_base_addr.index);
245 let region_offset = device.get_region_offset(caps.notify_base_addr.index);
246 let offset = region_offset + mmap_region[0].offset;
247
248 let mmap = MemoryMappingBuilder::new(mmap_region[0].size as usize)
249 .from_file(device.device_file())
250 .offset(offset)
251 .build()?;
252
253 Ok(QueueNotifier { addr, mmap })
254 }
255
notify(&self)256 pub fn notify(&self) {
257 // It's okay to not handle a failure here because if this fails we cannot recover
258 // anyway. The mmap address should be correct as initialized in the 'new()' function
259 // according to the given vfio device.
260 self.mmap
261 .write_obj_volatile(0_u8, self.addr as usize)
262 .expect("unable to write to mmap area");
263 }
264 }
265
266 pub struct VvuPciDevice {
267 pub vfio_dev: Arc<VfioDevice>,
268 pub caps: VvuPciCaps,
269 pub queues: Vec<UserQueue>,
270 pub queue_notifiers: Vec<QueueNotifier>,
271 pub irqs: Vec<Event>,
272 pub notification_evts: Vec<Event>,
273 }
274
275 #[derive(Debug, Clone, Copy)]
276 pub enum QueueType {
277 Rx = 0, // the integer represents the queue index.
278 Tx = 1,
279 }
280
281 impl VvuPciDevice {
282 /// Creates a driver for virtio-vhost-user PCI device from a PCI address.
283 ///
284 /// # Arguments
285 ///
286 /// * `pci_id` - PCI device ID such as `"0000:00:05.0"`. An error will be returned if this is
287 /// not a valid PCI device ID string.
288 /// * `device_vq_num` - number of virtqueues that the device backend (e.g. block) may use.
new(pci_id: &str, device_vq_num: usize) -> Result<Self>289 pub fn new(pci_id: &str, device_vq_num: usize) -> Result<Self> {
290 Self::new_from_address(
291 PciAddress::from_str(pci_id).context("failed to parse PCI address")?,
292 device_vq_num,
293 )
294 }
295
296 /// Creates a driver for virtio-vhost-user PCI device from a string containing a PCI address.
297 ///
298 /// # Arguments
299 ///
300 /// * `pci_address` - PCI device address.
301 /// * `device_vq_num` - number of virtqueues that the device backend (e.g. block) may use.
new_from_address(pci_address: PciAddress, device_vq_num: usize) -> Result<Self>302 pub fn new_from_address(pci_address: PciAddress, device_vq_num: usize) -> Result<Self> {
303 let vfio_path = format!("/sys/bus/pci/devices/{}", pci_address);
304 let vfio_dev = Arc::new(open_vfio_device(&vfio_path)?);
305 let config = VfioPciConfig::new(vfio_dev.clone());
306 let caps = VvuPciCaps::new(&config)?;
307
308 let page_mask = vfio_dev
309 .vfio_get_iommu_page_size_mask()
310 .context("failed to get iommu page size mask")?;
311 if page_mask & (base::pagesize() as u64) == 0 {
312 bail!("Unsupported iommu page mask {:x}", page_mask);
313 }
314
315 let mut pci_dev = Self {
316 vfio_dev,
317 caps,
318 queues: vec![],
319 queue_notifiers: vec![],
320 irqs: vec![],
321 notification_evts: vec![],
322 };
323
324 config.set_bus_master();
325 pci_dev.init(device_vq_num)?;
326
327 Ok(pci_dev)
328 }
329
set_status(&self, status: u8)330 fn set_status(&self, status: u8) {
331 let new_status = if status == VIRTIO_CONFIG_STATUS_RESET {
332 VIRTIO_CONFIG_STATUS_RESET
333 } else {
334 let cur_status: u8 = read_common_cfg_field!(self, device_status);
335 status | cur_status
336 };
337
338 write_common_cfg_field!(self, device_status, new_status);
339 }
340
get_device_feature(&self) -> u64341 fn get_device_feature(&self) -> u64 {
342 write_common_cfg_field!(self, device_feature_select, 0);
343 let lower: u32 = read_common_cfg_field!(self, device_feature);
344 write_common_cfg_field!(self, device_feature_select, 1);
345 let upper: u32 = read_common_cfg_field!(self, device_feature);
346
347 lower as u64 | ((upper as u64) << 32)
348 }
349
set_guest_feature(&self, features: u64)350 fn set_guest_feature(&self, features: u64) {
351 let lower: u32 = (features & (u32::MAX as u64)) as u32;
352 let upper: u32 = (features >> 32) as u32;
353 write_common_cfg_field!(self, guest_feature_select, 0);
354 write_common_cfg_field!(self, guest_feature, lower);
355 write_common_cfg_field!(self, guest_feature_select, 1);
356 write_common_cfg_field!(self, guest_feature, upper);
357 }
358
359 /// Creates the VVU's virtqueue (i.e. rxq or txq).
create_queue(&self, typ: QueueType) -> Result<(UserQueue, QueueNotifier)>360 fn create_queue(&self, typ: QueueType) -> Result<(UserQueue, QueueNotifier)> {
361 write_common_cfg_field!(self, queue_select, typ as u16);
362
363 let queue_size: u16 = read_common_cfg_field!(self, queue_size);
364 if queue_size == 0 {
365 bail!("queue_size for {:?} queue is 0", typ);
366 }
367
368 let device_writable = match typ {
369 QueueType::Rx => true,
370 QueueType::Tx => false,
371 };
372 let queue = UserQueue::new(queue_size, device_writable, typ as u8, self)?;
373 let DescTableAddrs { desc, avail, used } = queue.desc_table_addrs()?;
374
375 let desc_lo = (desc & 0xffffffff) as u32;
376 let desc_hi = (desc >> 32) as u32;
377 write_common_cfg_field!(self, queue_desc_lo, desc_lo);
378 write_common_cfg_field!(self, queue_desc_hi, desc_hi);
379
380 let avail_lo = (avail & 0xffffffff) as u32;
381 let avail_hi = (avail >> 32) as u32;
382 write_common_cfg_field!(self, queue_avail_lo, avail_lo);
383 write_common_cfg_field!(self, queue_avail_hi, avail_hi);
384
385 let used_lo = (used & 0xffffffff) as u32;
386 let used_hi = (used >> 32) as u32;
387 write_common_cfg_field!(self, queue_used_lo, used_lo);
388 write_common_cfg_field!(self, queue_used_hi, used_hi);
389
390 let notify_off: u16 = read_common_cfg_field!(self, queue_notify_off);
391 let mut notify_addr = self.caps.notify_base_addr.clone();
392 notify_addr.addr += notify_off as u64 * self.caps.notify_off_multiplier as u64;
393 let notifier = QueueNotifier::new(typ, &self.vfio_dev, &self.caps)?;
394
395 Ok((queue, notifier))
396 }
397
398 /// Creates the VVU's rxq and txq.
create_queues(&self) -> Result<(Vec<UserQueue>, Vec<QueueNotifier>)>399 fn create_queues(&self) -> Result<(Vec<UserQueue>, Vec<QueueNotifier>)> {
400 let (rxq, rxq_notifier) = self.create_queue(QueueType::Rx)?;
401 rxq_notifier.notify();
402
403 let (txq, txq_notifier) = self.create_queue(QueueType::Tx)?;
404 txq_notifier.notify();
405
406 Ok((vec![rxq, txq], vec![rxq_notifier, txq_notifier]))
407 }
408
409 /// Creates two sets of interrupts events; ones for the VVU virtqueues (i.e. rxq and txq) and
410 /// ones for the device virtqueues.
411 ///
412 /// # Arguments
413 /// * `device_vq_num` - the number of queues for the device.
create_irqs(&self, device_vq_num: usize) -> Result<(Vec<Event>, Vec<Event>)>414 fn create_irqs(&self, device_vq_num: usize) -> Result<(Vec<Event>, Vec<Event>)> {
415 const VIRTIO_MSI_NO_VECTOR: u16 = 0xffff;
416
417 // Sets msix_config
418 write_common_cfg_field!(self, msix_config, 0u16);
419 let v: u16 = read_common_cfg_field!(self, msix_config);
420 if v == VIRTIO_MSI_NO_VECTOR {
421 bail!("failed to set config vector: {}", v);
422 }
423
424 // Creates events for the interrupts of vvu's rxq and txq.
425 let vvu_irqs = vec![
426 Event::new().context("failed to create event")?,
427 Event::new().context("failed to create event")?,
428 ];
429
430 // Create events for the device virtqueue interrupts.
431 let mut notification_evts = Vec::with_capacity(device_vq_num);
432 for _ in 0..device_vq_num {
433 notification_evts.push(Event::new().context("failed to create event")?);
434 }
435
436 let msix_num = 2 + device_vq_num;
437 if msix_num > usize::from(self.caps.msix_table_size) {
438 bail!(
439 "{} MSI-X vector is required but only {} are available.",
440 msix_num,
441 self.caps.msix_table_size
442 );
443 }
444
445 let mut msix_vec = Vec::with_capacity(msix_num);
446 msix_vec.push(Some(&vvu_irqs[0]));
447 msix_vec.push(Some(&vvu_irqs[1]));
448 msix_vec.extend(notification_evts.iter().take(device_vq_num).map(Some));
449
450 self.vfio_dev
451 .irq_enable(&msix_vec, VFIO_PCI_MSIX_IRQ_INDEX, 0)
452 .map_err(|e| anyhow!("failed to enable irq: {}", e))?;
453
454 // Registers VVU virtqueue's irqs by writing `queue_msix_vector`.
455 for index in 0..self.queues.len() {
456 write_common_cfg_field!(self, queue_select, index as u16);
457 write_common_cfg_field!(self, queue_msix_vector, index as u16);
458 let v: u16 = read_common_cfg_field!(self, queue_msix_vector);
459 if v == VIRTIO_MSI_NO_VECTOR {
460 bail!("failed to set vector {} to {}-th vvu virtqueue", v, index);
461 }
462 }
463
464 let mmap_region = self
465 .vfio_dev
466 .get_region_mmap(self.caps.notify_cfg_addr.index);
467 let region_offset = self
468 .vfio_dev
469 .get_region_offset(self.caps.notify_cfg_addr.index);
470 let offset = region_offset + mmap_region[0].offset;
471
472 let mmap = MemoryMappingBuilder::new(mmap_region[0].size as usize)
473 .from_file(self.vfio_dev.device_file())
474 .offset(offset)
475 .build()?;
476
477 // Registers the device virtqueus's irqs by writing `notification_msix_vector`.
478 for i in 0..device_vq_num as u16 {
479 let msix_vector = self.queues.len() as u16 + i;
480
481 write_notify_cfg_field!(self, mmap, notification_select, i)
482 .expect("failed to write select");
483 let select: u16 = read_notify_cfg_field!(self, mmap, notification_select)
484 .expect("failed to verify select");
485 if select != i {
486 bail!("failed to select {}-th notification", i);
487 }
488
489 write_notify_cfg_field!(self, mmap, notification_msix_vector, msix_vector)
490 .expect("failed to write vector");
491 let vector: u16 = read_notify_cfg_field!(self, mmap, notification_msix_vector)
492 .expect("failed to verify vector");
493 if msix_vector != vector {
494 bail!(
495 "failed to set vector {} to {}-th notification",
496 msix_vector,
497 i
498 );
499 }
500 }
501
502 Ok((vvu_irqs, notification_evts))
503 }
504
init(&mut self, device_vq_num: usize) -> Result<()>505 fn init(&mut self, device_vq_num: usize) -> Result<()> {
506 self.set_status(VIRTIO_CONFIG_STATUS_RESET as u8);
507 // Wait until reset is done with timeout.
508 let deadline = Instant::now() + Duration::from_secs(1);
509 loop {
510 let cur_status: u8 = read_common_cfg_field!(self, device_status);
511 if cur_status == 0 {
512 break;
513 }
514 if Instant::now() < deadline {
515 std::thread::sleep(Duration::from_millis(10));
516 } else {
517 bail!("device initialization didn't finish within the time limit");
518 }
519 }
520
521 self.set_status(
522 (virtio_config::VIRTIO_CONFIG_S_ACKNOWLEDGE | virtio_config::VIRTIO_CONFIG_S_DRIVER)
523 as u8,
524 );
525
526 // TODO(b/207364742): Support VIRTIO_RING_F_EVENT_IDX.
527 let required_features = 1u64 << VIRTIO_F_VERSION_1;
528 let enabled_features = self.get_device_feature();
529 if (required_features & enabled_features) != required_features {
530 bail!(
531 "required feature set is 0x{:x} but 0x{:x} is enabled",
532 required_features,
533 enabled_features
534 );
535 };
536 self.set_guest_feature(required_features);
537 self.set_status(virtio_config::VIRTIO_CONFIG_S_FEATURES_OK as u8);
538
539 // Initialize Virtqueues
540 let (queues, queue_notifiers) = self.create_queues()?;
541 self.queues = queues;
542 self.queue_notifiers = queue_notifiers;
543
544 let (irqs, notification_evts) = self.create_irqs(device_vq_num)?;
545 self.irqs = irqs;
546 self.notification_evts = notification_evts;
547
548 // Enable Virtqueues
549 for index in 0..self.queues.len() {
550 write_common_cfg_field!(self, queue_select, index as u16);
551 write_common_cfg_field!(self, queue_enable, 1_u16);
552 }
553
554 self.set_status(virtio_config::VIRTIO_CONFIG_S_DRIVER_OK as u8);
555
556 Ok(())
557 }
558
start(&self) -> Result<()>559 pub fn start(&self) -> Result<()> {
560 const STATUS_OFFSET: u64 = 0;
561 const VIRTIO_VHOST_USER_STATUS_SLAVE_UP: usize = 0;
562 let mut status: u32 = self
563 .vfio_dev
564 .region_read_from_addr(&self.caps.dev_cfg_addr, STATUS_OFFSET);
565
566 status |= 1u32 << VIRTIO_VHOST_USER_STATUS_SLAVE_UP;
567
568 self.vfio_dev
569 .region_write_to_addr(&status, &self.caps.dev_cfg_addr, STATUS_OFFSET);
570
571 info!("vvu device started");
572 Ok(())
573 }
574 }
575
576 impl IovaAllocator for VvuPciDevice {
alloc_iova(&self, size: u64, tag: u8) -> Result<u64>577 fn alloc_iova(&self, size: u64, tag: u8) -> Result<u64> {
578 self.vfio_dev
579 .alloc_iova(size, base::pagesize() as u64, Alloc::VvuQueue(tag))
580 .context("failed to find an iova region to map the gpa region to")
581 }
582
map_iova(&self, iova: u64, size: u64, addr: *const u8) -> Result<()>583 unsafe fn map_iova(&self, iova: u64, size: u64, addr: *const u8) -> Result<()> {
584 self.vfio_dev
585 .vfio_dma_map(iova, size, addr as u64, true)
586 .context("failed to map iova")
587 }
588 }
589