• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 //! Implement the driver side of virtio queue handling.
6 //! The virtqueue struct is expected to be used in userspace VFIO virtio drivers.
7 
8 use std::mem;
9 use std::num::Wrapping;
10 use std::sync::atomic::{fence, Ordering};
11 
12 use anyhow::{anyhow, bail, Context, Result};
13 use data_model::{DataInit, Le16, Le32, Le64, VolatileSlice};
14 use virtio_sys::virtio_ring::VRING_DESC_F_WRITE;
15 use vm_memory::{GuestAddress as IOVA, GuestMemory as QueueMemory};
16 
17 use crate::virtio::Desc;
18 
19 #[derive(Copy, Clone, Debug)]
20 #[repr(C)]
21 struct UsedElem {
22     id: Le32,
23     len: Le32,
24 }
25 // Safe as there are no implicit offset.
26 unsafe impl DataInit for UsedElem {}
27 
28 const BUF_SIZE: u64 = 1024;
29 
30 pub struct DescTableAddrs {
31     pub desc: u64,
32     pub avail: u64,
33     pub used: u64,
34 }
35 
36 struct MemLayout {
37     /// Address of the descriptor table in UserQueue.mem.
38     desc_table: IOVA,
39 
40     /// Address of the available ring in UserQueue.mem.
41     avail_ring: IOVA,
42 
43     /// Address of the used ring in UserQueue.mem.
44     used_ring: IOVA,
45 
46     /// Address of the start of buffers in UserQueue.mem.
47     buffer_addr: IOVA,
48 }
49 
50 /// Represents a virtqueue that is allocated in the guest userspace and manipulated from a VFIO
51 /// driver.
52 ///
53 /// This struct is similar to `devices::virtio::Queue` which is designed for the virtio devices, but
54 /// this struct is defined for the virtio drivers.
55 ///
56 /// # Memory Layout
57 ///
58 /// `mem` is the memory allocated in the guest userspace for the virtqueue, which is mapped into
59 /// the vvu device via VFIO. The GuestAddresses of `mem` are the IOVAs that should be used when
60 /// communicating with the vvu device. All accesses to the shared memory from the device backend
61 /// must be done through the GuestMemory read/write functions.
62 ///
63 /// The layout `mem` is defined in the following table and stored in `mem_layout`.
64 ///
65 /// |                  | Alignment     | Size                         |
66 /// |-----------------------------------------------------------------|
67 /// | Descriptor Table | 16            | 16 ∗ (Queue Size)            |
68 /// | Available Ring   | 2             | 6 + 2 ∗ (Queue Size)         |
69 /// | Used Ring        | 4             | 6 + 8 ∗ (Queue Size)         |
70 /// | Buffers          | (Buffer Size) | (Buffer Size) * (Queue Size) |
71 /// -------------------------------------------------------------------
72 ///
73 /// TODO(b/207364742): Once we support `VIRTIO_F_EVENT_IDX`, the additional 2 bytes for the
74 /// `used_event` field will be added.
75 /// TODO(b/215153367): Use `crate::virtio::Queue` as an underlying data structure so that we can use
76 /// `descriptor_utils::{Reader, Writer}` instead of having our own read/write methods.
77 /// One of the biggest blockers is that `virtio::Queue` is designed for device-side's virtqueue,
78 /// where readable/writable areas are inverted from our use case.
79 pub struct UserQueue {
80     /// The queue size.
81     size: Wrapping<u16>,
82 
83     /// The underlying memory.
84     mem: QueueMemory,
85 
86     /// Virtqueue layout on `mem`.
87     mem_layout: MemLayout,
88 
89     avail_idx: Wrapping<u16>,
90 
91     used_count: Wrapping<u16>,
92     free_count: Wrapping<u16>,
93 
94     /// Whether buffers are device-writable or readable.
95     /// If true, every descriptor has the VIRTQ_DESC_F_WRITE flag.
96     /// TODO(b/215153358, b/215153367): Since VIRTQ_DESC_F_WRITE is a per-descriptor flag, this
97     /// design is specific to the current vvu specification draft, where a device-writable queue
98     /// and a device-readable queue are separated.
99     /// Ideally, we should update the vvu spec to use both device-{readable, writable} buffers in
100     /// one virtqueue. Also, it's better to use `crate::virtio::DescriptorChain` for descirptors as
101     /// a part of b/215153367.
102     device_writable: bool,
103 }
104 
105 /// Interface used by UserQueue to interact with the IOMMU.
106 pub trait IovaAllocator {
107     /// Allocates an IO virtual address region of the requested size.
alloc_iova(&self, size: u64, tag: u8) -> Result<u64>108     fn alloc_iova(&self, size: u64, tag: u8) -> Result<u64>;
109     /// Maps the given address at the given IOVA.
110     ///
111     /// # Safety
112     ///
113     /// `addr` must reference a region of at least length `size`. Memory passed
114     /// to this function may be mutated at any time, so `addr` must not be memory
115     /// that is directly managed by rust.
map_iova(&self, iova: u64, size: u64, addr: *const u8) -> Result<()>116     unsafe fn map_iova(&self, iova: u64, size: u64, addr: *const u8) -> Result<()>;
117 }
118 
119 impl UserQueue {
120     /// Creats a `UserQueue` instance.
new<I>(queue_size: u16, device_writable: bool, tag: u8, iova_alloc: &I) -> Result<Self> where I: IovaAllocator,121     pub fn new<I>(queue_size: u16, device_writable: bool, tag: u8, iova_alloc: &I) -> Result<Self>
122     where
123         I: IovaAllocator,
124     {
125         let (mem, size, mem_layout) = Self::init_memory(queue_size, tag, iova_alloc)?;
126 
127         let mut queue = Self {
128             mem,
129             size: Wrapping(size),
130             mem_layout,
131             avail_idx: Wrapping(0),
132             used_count: Wrapping(0),
133             free_count: Wrapping(size),
134             device_writable,
135         };
136 
137         queue.init_descriptor_table()?;
138 
139         Ok(queue)
140     }
141 
142     /// Allocates memory region and returns addresses on the regions for (`desc_table`, `avail_ring`, `used_ring`, `buffer``).
init_memory<I>( max_queue_size: u16, tag: u8, iova_alloc: &I, ) -> Result<(QueueMemory, u16, MemLayout)> where I: IovaAllocator,143     fn init_memory<I>(
144         max_queue_size: u16,
145         tag: u8,
146         iova_alloc: &I,
147     ) -> Result<(QueueMemory, u16, MemLayout)>
148     where
149         I: IovaAllocator,
150     {
151         // Since vhost-user negotiation finishes within ~20 messages, queue size 32 is enough.
152         const MAX_QUEUE_SIZE: u16 = 256;
153 
154         let queue_size = std::cmp::min(MAX_QUEUE_SIZE, max_queue_size);
155         if queue_size == 0 || !queue_size.is_power_of_two() {
156             bail!(
157                 "queue_size must be a positive power of 2 number but {}",
158                 queue_size
159             );
160         }
161 
162         fn align(n: u64, m: u64) -> u64 {
163             ((n + m - 1) / m) * m
164         }
165 
166         let desc_table = IOVA(0);
167         let desc_size = 16u64 * u64::from(queue_size);
168         let desc_end = desc_table.0 + desc_size;
169 
170         let avail_ring = IOVA(align(desc_end, 2));
171         let avail_size = 6 + 2 * u64::from(queue_size);
172         let avail_end = avail_ring.0 + avail_size;
173 
174         let used_ring = IOVA(align(avail_end, 4));
175         let used_size = 6 + 8 * u64::from(queue_size);
176         let used_end = used_ring.0 + used_size;
177 
178         let buffer_addr = IOVA(align(used_end, BUF_SIZE));
179         let buffer_size = BUF_SIZE * u64::from(queue_size);
180 
181         let mem_size = align(buffer_addr.0 + buffer_size, base::pagesize() as u64);
182         let iova_start = iova_alloc
183             .alloc_iova(mem_size, tag)
184             .context("failed to allocate queue iova")?;
185 
186         let mem = QueueMemory::new(&[(IOVA(iova_start), mem_size)])
187             .map_err(|e| anyhow!("failed to create QueueMemory for virtqueue: {}", e))?;
188 
189         let host_addr = mem
190             .get_host_address_range(IOVA(iova_start), mem_size as usize)
191             .context("failed to get host address")?;
192         // Safe because the region being mapped is managed via the GuestMemory interface.
193         unsafe {
194             iova_alloc
195                 .map_iova(iova_start, mem_size, host_addr)
196                 .context("failed to map queue")?;
197         }
198 
199         let mem_layout = MemLayout {
200             desc_table: desc_table.unchecked_add(iova_start),
201             avail_ring: avail_ring.unchecked_add(iova_start),
202             used_ring: used_ring.unchecked_add(iova_start),
203             buffer_addr: buffer_addr.unchecked_add(iova_start),
204         };
205 
206         Ok((mem, queue_size, mem_layout))
207     }
208 
209     /// Initialize the descriptor table.
init_descriptor_table(&mut self) -> Result<()>210     fn init_descriptor_table(&mut self) -> Result<()> {
211         let flags = if self.device_writable {
212             Le16::from(VRING_DESC_F_WRITE as u16)
213         } else {
214             Le16::from(0)
215         };
216         let len = Le32::from(BUF_SIZE as u32);
217         let next = Le16::from(0);
218 
219         // Register pre-allocated buffers to the descriptor area.
220         for i in 0..self.size.0 {
221             let idx = Wrapping(i);
222             let iova = self.buffer_address(idx)?.offset();
223             let desc = Desc {
224                 addr: iova.into(),
225                 len,
226                 flags,
227                 next,
228             };
229             self.write_desc_entry(idx, desc)
230                 .map_err(|e| anyhow!("failed to write {}-th desc: {}", idx, e))?;
231 
232             fence(Ordering::SeqCst);
233             self.mem
234                 .write_obj_at_addr(
235                     idx.0,
236                     self.mem_layout
237                         .avail_ring
238                         .unchecked_add(u64::from(4 + 2 * i)),
239                 )
240                 .context("failed to write avail ring")?;
241         }
242 
243         // If all of `self`'s buffers are device-writable, expose them to the device.
244         if self.device_writable {
245             for _ in 0..self.size.0 {
246                 // TODO(keiichiw): avail_idx should be incremented in update_avail_index
247                 self.avail_idx += Wrapping(1);
248                 self.update_avail_index()?;
249             }
250         }
251 
252         Ok(())
253     }
254 
desc_table_addrs(&self) -> Result<DescTableAddrs>255     pub fn desc_table_addrs(&self) -> Result<DescTableAddrs> {
256         Ok(DescTableAddrs {
257             desc: self.mem_layout.desc_table.offset(),
258             avail: self.mem_layout.avail_ring.offset(),
259             used: self.mem_layout.used_ring.offset(),
260         })
261     }
262 
263     /// Returns the IOVA of the buffer for the given `index`.
buffer_address(&self, index: Wrapping<u16>) -> Result<IOVA>264     fn buffer_address(&self, index: Wrapping<u16>) -> Result<IOVA> {
265         let offset = u64::from((index % self.size).0) * BUF_SIZE;
266         self.mem_layout
267             .buffer_addr
268             .checked_add(offset)
269             .ok_or(anyhow!("overflow txq"))
270     }
271 
272     /// Writes the given descriptor table entry.
write_desc_entry(&self, index: Wrapping<u16>, desc: Desc) -> Result<()>273     fn write_desc_entry(&self, index: Wrapping<u16>, desc: Desc) -> Result<()> {
274         let addr = self
275             .mem_layout
276             .desc_table
277             .unchecked_add(u64::from((index % self.size).0) * mem::size_of::<Desc>() as u64);
278         fence(Ordering::SeqCst);
279         self.mem
280             .write_obj_at_addr(desc, addr)
281             .context("failed to write desc")
282     }
283 
284     /// Puts an index into the avail ring for use by the host.
update_avail_index(&self) -> Result<()>285     fn update_avail_index(&self) -> Result<()> {
286         fence(Ordering::SeqCst);
287         self.mem
288             .write_obj_at_addr(
289                 self.avail_idx.0,
290                 self.mem_layout.avail_ring.unchecked_add(2),
291             )
292             .context("failed to write avail.idx")?;
293         Ok(())
294     }
295 
296     /// Reads the Used ring's index.
read_used_idx(&self) -> Result<Wrapping<u16>>297     fn read_used_idx(&self) -> Result<Wrapping<u16>> {
298         let used_index_addr = self.mem_layout.used_ring.unchecked_add(2);
299         fence(Ordering::SeqCst);
300         let used_index: u16 = self.mem.read_obj_from_addr(used_index_addr).unwrap();
301         Ok(Wrapping(used_index))
302     }
303 
304     /// Reads the Used ring's element for the given index.
read_used_elem(&self, idx: Wrapping<u16>) -> Result<UsedElem>305     fn read_used_elem(&self, idx: Wrapping<u16>) -> Result<UsedElem> {
306         let offset = 4 + (idx % self.size).0 as usize * mem::size_of::<UsedElem>();
307         let addr = self
308             .mem_layout
309             .used_ring
310             .checked_add(offset as u64)
311             .context("overflow")?;
312         fence(Ordering::SeqCst);
313         self.mem
314             .read_obj_from_addr(addr)
315             .context("failed to read used")
316     }
317 
318     /// Reads data in the virtqueue.
319     /// Returns `Ok(None)` if no data are available.
320     ///
321     /// TODO: Use `descriptor_utils::Reader`.
read_data(&mut self) -> Result<Option<VolatileSlice>>322     pub fn read_data(&mut self) -> Result<Option<VolatileSlice>> {
323         if !self.device_writable {
324             bail!("driver cannot read device-readable descriptors");
325         }
326 
327         let idx = self.read_used_idx()?;
328         let cur = self.used_count;
329         if cur == idx {
330             return Ok(None);
331         }
332 
333         let elem = self.read_used_elem(cur)?;
334 
335         let id = Wrapping(u32::from(elem.id) as u16);
336         let len = u32::from(elem.len) as usize;
337 
338         let addr = self.buffer_address(id)?;
339 
340         fence(Ordering::SeqCst);
341         let s = self
342             .mem
343             .get_slice_at_addr(addr, len)
344             .context("failed to read data")?;
345 
346         self.used_count += Wrapping(1);
347         self.avail_idx += Wrapping(1);
348         self.update_avail_index()?;
349         Ok(Some(s))
350     }
351 
352     /// Writes data into virtqueue's buffer and returns its address.
353     ///
354     /// TODO: Use `descriptor_utils::Writer`.
write_to_buffer(&self, index: Wrapping<u16>, data: &[u8]) -> Result<IOVA>355     fn write_to_buffer(&self, index: Wrapping<u16>, data: &[u8]) -> Result<IOVA> {
356         if data.len() as u64 > BUF_SIZE {
357             bail!(
358                 "data size {} is larger than the buffer size {}",
359                 data.len(),
360                 BUF_SIZE
361             );
362         }
363 
364         let addr = self.buffer_address(index)?;
365         fence(Ordering::SeqCst);
366         let written = self
367             .mem
368             .write_at_addr(data, addr)
369             .context("failed to write data")?;
370         if written < data.len() {
371             bail!(
372                 "no enough memory: written {}, but data length is {}",
373                 written,
374                 data.len()
375             );
376         }
377         Ok(addr)
378     }
379 
380     /// Acknowledges buffers that the device used.
ack_used(&mut self) -> Result<()>381     pub fn ack_used(&mut self) -> Result<()> {
382         let used_idx = self.read_used_idx()?;
383         let num_used = used_idx - self.used_count;
384 
385         self.used_count += num_used;
386         self.free_count += num_used;
387 
388         Ok(())
389     }
390 
391     /// Writes the given data to the virtqueue.
write(&mut self, data: &[u8]) -> Result<()>392     pub fn write(&mut self, data: &[u8]) -> Result<()> {
393         if self.device_writable {
394             bail!("driver cannot write to device-writable descriptors");
395         }
396 
397         self.ack_used()?;
398 
399         if self.free_count == Wrapping(0) {
400             // TODO: wait until the device processes buffers.
401             bail!("no avail descriptor is left");
402         }
403 
404         let addr = self
405             .write_to_buffer(self.avail_idx, data)
406             .context("failed to write data to virtqueue")?;
407 
408         let desc = Desc {
409             addr: Le64::from(addr.offset()),
410             len: Le32::from(data.len() as u32),
411             flags: Le16::from(0),
412             next: Le16::from(0),
413         };
414         self.write_desc_entry(self.avail_idx, desc)?;
415         self.free_count -= Wrapping(1);
416 
417         self.avail_idx += Wrapping(1);
418         self.update_avail_index()?;
419 
420         Ok(())
421     }
422 }
423 
424 #[cfg(test)]
425 mod test {
426     use super::*;
427 
428     use std::cell::RefCell;
429     use std::io::Read;
430     use std::io::Write;
431 
432     use crate::virtio::{Queue as DeviceQueue, Reader, Writer};
433 
434     // An allocator that just allocates 0 as an IOVA.
435     struct SimpleIovaAllocator(RefCell<bool>);
436 
437     impl IovaAllocator for SimpleIovaAllocator {
alloc_iova(&self, _size: u64, _tag: u8) -> Result<u64>438         fn alloc_iova(&self, _size: u64, _tag: u8) -> Result<u64> {
439             if *self.0.borrow() {
440                 bail!("exhaused");
441             }
442             *self.0.borrow_mut() = true;
443             Ok(0)
444         }
445 
map_iova(&self, _iova: u64, _size: u64, _addr: *const u8) -> Result<()>446         unsafe fn map_iova(&self, _iova: u64, _size: u64, _addr: *const u8) -> Result<()> {
447             if !*self.0.borrow() {
448                 bail!("not allocated");
449             }
450             Ok(())
451         }
452     }
453 
setup_vq(queue: &mut DeviceQueue, addrs: DescTableAddrs)454     fn setup_vq(queue: &mut DeviceQueue, addrs: DescTableAddrs) {
455         queue.desc_table = IOVA(addrs.desc);
456         queue.avail_ring = IOVA(addrs.avail);
457         queue.used_ring = IOVA(addrs.used);
458         queue.ready = true;
459     }
460 
device_write(mem: &QueueMemory, q: &mut DeviceQueue, data: &[u8]) -> usize461     fn device_write(mem: &QueueMemory, q: &mut DeviceQueue, data: &[u8]) -> usize {
462         let desc_chain = q.pop(mem).unwrap();
463         let index = desc_chain.index;
464 
465         let mut writer = Writer::new(mem.clone(), desc_chain).unwrap();
466         let written = writer.write(data).unwrap();
467         q.add_used(mem, index, written as u32);
468         written
469     }
470 
device_read(mem: &QueueMemory, q: &mut DeviceQueue, len: usize) -> Vec<u8>471     fn device_read(mem: &QueueMemory, q: &mut DeviceQueue, len: usize) -> Vec<u8> {
472         let desc_chain = q.pop(mem).unwrap();
473         let desc_index = desc_chain.index;
474         let mut reader = Reader::new(mem.clone(), desc_chain).unwrap();
475         let mut buf = vec![0; len];
476         reader.read_exact(&mut buf).unwrap();
477         q.add_used(mem, desc_index, len as u32);
478         buf
479     }
480 
driver_read(q: &mut UserQueue) -> Vec<u8>481     fn driver_read(q: &mut UserQueue) -> Vec<u8> {
482         let data = q.read_data().unwrap().unwrap();
483         let mut buf = vec![0; data.size()];
484         data.copy_to(&mut buf);
485 
486         buf
487     }
488 
driver_write(q: &mut UserQueue, data: &[u8])489     fn driver_write(q: &mut UserQueue, data: &[u8]) {
490         q.write(data).unwrap()
491     }
492 
493     // Send an array from the driver to the device `count` times.
drv_to_dev(queue_size: u16, count: u32)494     fn drv_to_dev(queue_size: u16, count: u32) {
495         let iova_alloc = SimpleIovaAllocator(RefCell::new(false));
496         let mut drv_queue =
497             UserQueue::new(queue_size, false /* device_writable */, 0, &iova_alloc).unwrap();
498         let mut dev_queue = DeviceQueue::new(queue_size);
499         setup_vq(&mut dev_queue, drv_queue.desc_table_addrs().unwrap());
500 
501         for i in 0..count {
502             let input = vec![(i + 1) as u8; 5];
503             driver_write(&mut drv_queue, &input);
504 
505             let buf = device_read(&drv_queue.mem, &mut dev_queue, input.len());
506             assert_eq!(input, buf);
507             assert!(dev_queue.peek(&drv_queue.mem).is_none());
508         }
509     }
510 
511     #[test]
test_driver_write()512     fn test_driver_write() {
513         let queue_size = 256;
514         let iteration = 20;
515         drv_to_dev(queue_size, iteration);
516     }
517 
518     #[test]
test_driver_write_small_queue()519     fn test_driver_write_small_queue() {
520         // Test with a small queue.
521         let queue_size = 8;
522         let iteration = 20;
523         drv_to_dev(queue_size, iteration);
524     }
525 
526     // This test loops (65536 + 20) times. To avoid running it on slow emulated CI environments,
527     // specify target architecture.
528     // TODO(keiichiw): Change the test to mutate queues' internal state to avoid the actual loop.
529     #[test]
530     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
test_driver_write_wrapping()531     fn test_driver_write_wrapping() {
532         // Test the index can be wrapped around when the iteration count exceeds 16bits.
533         let queue_size = 256;
534 
535         let iteration = u32::from(u16::MAX) + 20;
536         drv_to_dev(queue_size, iteration);
537     }
538 
539     // Send an array from the device to the driver `count` times.
dev_to_drv(queue_size: u16, count: u32)540     fn dev_to_drv(queue_size: u16, count: u32) {
541         let iova_alloc = SimpleIovaAllocator(RefCell::new(false));
542         let mut drv_queue =
543             UserQueue::new(queue_size, true /* device_writable */, 0, &iova_alloc).unwrap();
544         let mut dev_queue = DeviceQueue::new(queue_size);
545         setup_vq(&mut dev_queue, drv_queue.desc_table_addrs().unwrap());
546 
547         for i in 0..count {
548             let input = [i as u8; 5];
549 
550             // Device writes data to driver
551             let written = device_write(&drv_queue.mem, &mut dev_queue, &input);
552             assert_eq!(written, input.len());
553 
554             // Driver reads data
555             let buf = driver_read(&mut drv_queue);
556             assert_eq!(buf, input);
557         }
558     }
559 
560     #[test]
test_driver_read()561     fn test_driver_read() {
562         let queue_size = 256;
563         let iteration = 20;
564         dev_to_drv(queue_size, iteration);
565     }
566 
567     #[test]
test_driver_read_small_queue()568     fn test_driver_read_small_queue() {
569         // Test with a small queue.
570         let queue_size = 8;
571         let iteration = 20;
572         dev_to_drv(queue_size, iteration);
573     }
574 
575     // This test loops (65536 + 20) times. To avoid running it on slow emulated CI environments,
576     // specify target architecture.
577     // TODO(keiichiw): Change the test to mutate queues' internal state to avoid the actual loop.
578     #[test]
579     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
test_driver_read_wrapping()580     fn test_driver_read_wrapping() {
581         // Test the index can be wrapped around when the iteration count exceeds 16bits.
582         let queue_size = 256;
583         let iteration = u32::from(u16::MAX) + 20;
584         dev_to_drv(queue_size, iteration);
585     }
586 }
587