• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 //! Implement the driver side of virtio queue handling.
6 //! The virtqueue struct is expected to be used in userspace VFIO virtio drivers.
7 
8 use std::mem;
9 use std::num::Wrapping;
10 use std::sync::atomic::fence;
11 use std::sync::atomic::Ordering;
12 
13 use anyhow::anyhow;
14 use anyhow::bail;
15 use anyhow::Context;
16 use anyhow::Result;
17 use data_model::Le16;
18 use data_model::Le32;
19 use data_model::Le64;
20 use data_model::VolatileSlice;
21 use virtio_sys::virtio_ring::VRING_DESC_F_WRITE;
22 use vm_memory::GuestAddress as IOVA;
23 use vm_memory::GuestMemory as QueueMemory;
24 use zerocopy::FromBytes;
25 
26 use crate::virtio::Desc;
27 
28 #[derive(Copy, Clone, Debug, FromBytes)]
29 #[repr(C)]
30 struct UsedElem {
31     id: Le32,
32     len: Le32,
33 }
34 
35 const BUF_SIZE: u64 = 1024;
36 
37 pub struct DescTableAddrs {
38     pub desc: u64,
39     pub avail: u64,
40     pub used: u64,
41 }
42 
43 struct MemLayout {
44     /// Address of the descriptor table in UserQueue.mem.
45     desc_table: IOVA,
46 
47     /// Address of the available ring in UserQueue.mem.
48     avail_ring: IOVA,
49 
50     /// Address of the used ring in UserQueue.mem.
51     used_ring: IOVA,
52 
53     /// Address of the start of buffers in UserQueue.mem.
54     buffer_addr: IOVA,
55 }
56 
57 /// Represents a virtqueue that is allocated in the guest userspace and manipulated from a VFIO
58 /// driver.
59 ///
60 /// This struct is similar to `devices::virtio::Queue` which is designed for the virtio devices, but
61 /// this struct is defined for the virtio drivers.
62 ///
63 /// # Memory Layout
64 ///
65 /// `mem` is the memory allocated in the guest userspace for the virtqueue, which is mapped into
66 /// the vvu device via VFIO. The GuestAddresses of `mem` are the IOVAs that should be used when
67 /// communicating with the vvu device. All accesses to the shared memory from the device backend
68 /// must be done through the GuestMemory read/write functions.
69 ///
70 /// The layout `mem` is defined in the following table and stored in `mem_layout`.
71 ///
72 /// |                  | Alignment     | Size                         |
73 /// |-----------------------------------------------------------------|
74 /// | Descriptor Table | 16            | 16 ∗ (Queue Size)            |
75 /// | Available Ring   | 2             | 6 + 2 ∗ (Queue Size)         |
76 /// | Used Ring        | 4             | 6 + 8 ∗ (Queue Size)         |
77 /// | Buffers          | (Buffer Size) | (Buffer Size) * (Queue Size) |
78 /// -------------------------------------------------------------------
79 ///
80 /// TODO(b/207364742): Once we support `VIRTIO_F_EVENT_IDX`, the additional 2 bytes for the
81 /// `used_event` field will be added.
82 /// TODO(b/215153367): Use `crate::virtio::Queue` as an underlying data structure so that we can use
83 /// `descriptor_utils::{Reader, Writer}` instead of having our own read/write methods.
84 /// One of the biggest blockers is that `virtio::Queue` is designed for device-side's virtqueue,
85 /// where readable/writable areas are inverted from our use case.
86 pub struct UserQueue {
87     /// The queue size.
88     size: Wrapping<u16>,
89 
90     /// The underlying memory.
91     mem: QueueMemory,
92 
93     /// Virtqueue layout on `mem`.
94     mem_layout: MemLayout,
95 
96     avail_idx: Wrapping<u16>,
97 
98     used_count: Wrapping<u16>,
99     free_count: Wrapping<u16>,
100 
101     /// Whether buffers are device-writable or readable.
102     /// If true, every descriptor has the VIRTQ_DESC_F_WRITE flag.
103     /// TODO(b/215153358, b/215153367): Since VIRTQ_DESC_F_WRITE is a per-descriptor flag, this
104     /// design is specific to the current vvu specification draft, where a device-writable queue
105     /// and a device-readable queue are separated.
106     /// Ideally, we should update the vvu spec to use both device-{readable, writable} buffers in
107     /// one virtqueue. Also, it's better to use `crate::virtio::DescriptorChain` for descirptors as
108     /// a part of b/215153367.
109     device_writable: bool,
110 }
111 
112 /// Interface used by UserQueue to interact with the IOMMU.
113 pub trait IovaAllocator {
114     /// Allocates an IO virtual address region of the requested size.
alloc_iova(&self, size: u64, tag: u8) -> Result<u64>115     fn alloc_iova(&self, size: u64, tag: u8) -> Result<u64>;
116     /// Maps the given address at the given IOVA.
117     ///
118     /// # Safety
119     ///
120     /// `addr` must reference a region of at least length `size`. Memory passed
121     /// to this function may be mutated at any time, so `addr` must not be memory
122     /// that is directly managed by rust.
map_iova(&self, iova: u64, size: u64, addr: *const u8) -> Result<()>123     unsafe fn map_iova(&self, iova: u64, size: u64, addr: *const u8) -> Result<()>;
124 }
125 
126 impl UserQueue {
127     /// Creats a `UserQueue` instance.
new<I>(queue_size: u16, device_writable: bool, tag: u8, iova_alloc: &I) -> Result<Self> where I: IovaAllocator,128     pub fn new<I>(queue_size: u16, device_writable: bool, tag: u8, iova_alloc: &I) -> Result<Self>
129     where
130         I: IovaAllocator,
131     {
132         let (mem, size, mem_layout) = Self::init_memory(queue_size, tag, iova_alloc)?;
133 
134         let mut queue = Self {
135             mem,
136             size: Wrapping(size),
137             mem_layout,
138             avail_idx: Wrapping(0),
139             used_count: Wrapping(0),
140             free_count: Wrapping(size),
141             device_writable,
142         };
143 
144         queue.init_descriptor_table()?;
145 
146         Ok(queue)
147     }
148 
149     /// Allocates memory region and returns addresses on the regions for (`desc_table`, `avail_ring`, `used_ring`, `buffer``).
init_memory<I>( max_queue_size: u16, tag: u8, iova_alloc: &I, ) -> Result<(QueueMemory, u16, MemLayout)> where I: IovaAllocator,150     fn init_memory<I>(
151         max_queue_size: u16,
152         tag: u8,
153         iova_alloc: &I,
154     ) -> Result<(QueueMemory, u16, MemLayout)>
155     where
156         I: IovaAllocator,
157     {
158         // Since vhost-user negotiation finishes within ~20 messages, queue size 32 is enough.
159         const MAX_QUEUE_SIZE: u16 = 256;
160 
161         let queue_size = std::cmp::min(MAX_QUEUE_SIZE, max_queue_size);
162         if queue_size == 0 || !queue_size.is_power_of_two() {
163             bail!(
164                 "queue_size must be a positive power of 2 number but {}",
165                 queue_size
166             );
167         }
168 
169         fn align(n: u64, m: u64) -> u64 {
170             ((n + m - 1) / m) * m
171         }
172 
173         let desc_table = IOVA(0);
174         let desc_size = 16u64 * u64::from(queue_size);
175         let desc_end = desc_table.0 + desc_size;
176 
177         let avail_ring = IOVA(align(desc_end, 2));
178         let avail_size = 6 + 2 * u64::from(queue_size);
179         let avail_end = avail_ring.0 + avail_size;
180 
181         let used_ring = IOVA(align(avail_end, 4));
182         let used_size = 6 + 8 * u64::from(queue_size);
183         let used_end = used_ring.0 + used_size;
184 
185         let buffer_addr = IOVA(align(used_end, BUF_SIZE));
186         let buffer_size = BUF_SIZE * u64::from(queue_size);
187 
188         let mem_size = align(buffer_addr.0 + buffer_size, base::pagesize() as u64);
189         let iova_start = iova_alloc
190             .alloc_iova(mem_size, tag)
191             .context("failed to allocate queue iova")?;
192 
193         let mem = QueueMemory::new(&[(IOVA(iova_start), mem_size)])
194             .map_err(|e| anyhow!("failed to create QueueMemory for virtqueue: {}", e))?;
195 
196         let host_addr = mem
197             .get_host_address_range(IOVA(iova_start), mem_size as usize)
198             .context("failed to get host address")?;
199         // Safe because the region being mapped is managed via the GuestMemory interface.
200         unsafe {
201             iova_alloc
202                 .map_iova(iova_start, mem_size, host_addr)
203                 .context("failed to map queue")?;
204         }
205 
206         let mem_layout = MemLayout {
207             desc_table: desc_table.unchecked_add(iova_start),
208             avail_ring: avail_ring.unchecked_add(iova_start),
209             used_ring: used_ring.unchecked_add(iova_start),
210             buffer_addr: buffer_addr.unchecked_add(iova_start),
211         };
212 
213         Ok((mem, queue_size, mem_layout))
214     }
215 
216     /// Initialize the descriptor table.
init_descriptor_table(&mut self) -> Result<()>217     fn init_descriptor_table(&mut self) -> Result<()> {
218         let flags = if self.device_writable {
219             Le16::from(VRING_DESC_F_WRITE as u16)
220         } else {
221             Le16::from(0)
222         };
223         let len = Le32::from(BUF_SIZE as u32);
224         let next = Le16::from(0);
225 
226         // Register pre-allocated buffers to the descriptor area.
227         for i in 0..self.size.0 {
228             let idx = Wrapping(i);
229             let iova = self.buffer_address(idx)?.offset();
230             let desc = Desc {
231                 addr: iova.into(),
232                 len,
233                 flags,
234                 next,
235             };
236             self.write_desc_entry(idx, desc)
237                 .map_err(|e| anyhow!("failed to write {}-th desc: {}", idx, e))?;
238 
239             fence(Ordering::SeqCst);
240             self.mem
241                 .write_obj_at_addr(
242                     idx.0,
243                     self.mem_layout
244                         .avail_ring
245                         .unchecked_add(u64::from(4 + 2 * i)),
246                 )
247                 .context("failed to write avail ring")?;
248         }
249 
250         // If all of `self`'s buffers are device-writable, expose them to the device.
251         if self.device_writable {
252             for _ in 0..self.size.0 {
253                 // TODO(keiichiw): avail_idx should be incremented in update_avail_index
254                 self.avail_idx += Wrapping(1);
255                 self.update_avail_index()?;
256             }
257         }
258 
259         Ok(())
260     }
261 
desc_table_addrs(&self) -> Result<DescTableAddrs>262     pub fn desc_table_addrs(&self) -> Result<DescTableAddrs> {
263         Ok(DescTableAddrs {
264             desc: self.mem_layout.desc_table.offset(),
265             avail: self.mem_layout.avail_ring.offset(),
266             used: self.mem_layout.used_ring.offset(),
267         })
268     }
269 
270     /// Returns the IOVA of the buffer for the given `index`.
buffer_address(&self, index: Wrapping<u16>) -> Result<IOVA>271     fn buffer_address(&self, index: Wrapping<u16>) -> Result<IOVA> {
272         let offset = u64::from((index % self.size).0) * BUF_SIZE;
273         self.mem_layout
274             .buffer_addr
275             .checked_add(offset)
276             .ok_or(anyhow!("overflow txq"))
277     }
278 
279     /// Writes the given descriptor table entry.
write_desc_entry(&self, index: Wrapping<u16>, desc: Desc) -> Result<()>280     fn write_desc_entry(&self, index: Wrapping<u16>, desc: Desc) -> Result<()> {
281         let addr = self
282             .mem_layout
283             .desc_table
284             .unchecked_add(u64::from((index % self.size).0) * mem::size_of::<Desc>() as u64);
285         fence(Ordering::SeqCst);
286         self.mem
287             .write_obj_at_addr(desc, addr)
288             .context("failed to write desc")
289     }
290 
291     /// Puts an index into the avail ring for use by the host.
update_avail_index(&self) -> Result<()>292     fn update_avail_index(&self) -> Result<()> {
293         fence(Ordering::SeqCst);
294         self.mem
295             .write_obj_at_addr(
296                 self.avail_idx.0,
297                 self.mem_layout.avail_ring.unchecked_add(2),
298             )
299             .context("failed to write avail.idx")?;
300         Ok(())
301     }
302 
303     /// Reads the Used ring's index.
read_used_idx(&self) -> Result<Wrapping<u16>>304     fn read_used_idx(&self) -> Result<Wrapping<u16>> {
305         let used_index_addr = self.mem_layout.used_ring.unchecked_add(2);
306         fence(Ordering::SeqCst);
307         let used_index: u16 = self.mem.read_obj_from_addr(used_index_addr).unwrap();
308         Ok(Wrapping(used_index))
309     }
310 
311     /// Reads the Used ring's element for the given index.
read_used_elem(&self, idx: Wrapping<u16>) -> Result<UsedElem>312     fn read_used_elem(&self, idx: Wrapping<u16>) -> Result<UsedElem> {
313         let offset = 4 + (idx % self.size).0 as usize * mem::size_of::<UsedElem>();
314         let addr = self
315             .mem_layout
316             .used_ring
317             .checked_add(offset as u64)
318             .context("overflow")?;
319         fence(Ordering::SeqCst);
320         self.mem
321             .read_obj_from_addr(addr)
322             .context("failed to read used")
323     }
324 
325     /// Reads data in the virtqueue.
326     /// Returns `Ok(None)` if no data are available.
327     ///
328     /// TODO: Use `descriptor_utils::Reader`.
read_data(&mut self) -> Result<Option<VolatileSlice>>329     pub fn read_data(&mut self) -> Result<Option<VolatileSlice>> {
330         if !self.device_writable {
331             bail!("driver cannot read device-readable descriptors");
332         }
333 
334         let idx = self.read_used_idx()?;
335         let cur = self.used_count;
336         if cur == idx {
337             return Ok(None);
338         }
339 
340         let elem = self.read_used_elem(cur)?;
341 
342         let id = Wrapping(u32::from(elem.id) as u16);
343         let len = u32::from(elem.len) as usize;
344 
345         let addr = self.buffer_address(id)?;
346 
347         fence(Ordering::SeqCst);
348         let s = self
349             .mem
350             .get_slice_at_addr(addr, len)
351             .context("failed to read data")?;
352 
353         self.used_count += Wrapping(1);
354         self.avail_idx += Wrapping(1);
355         self.update_avail_index()?;
356         Ok(Some(s))
357     }
358 
359     /// Writes data into virtqueue's buffer and returns its address.
360     ///
361     /// TODO: Use `descriptor_utils::Writer`.
write_to_buffer(&self, index: Wrapping<u16>, data: &[u8]) -> Result<IOVA>362     fn write_to_buffer(&self, index: Wrapping<u16>, data: &[u8]) -> Result<IOVA> {
363         if data.len() as u64 > BUF_SIZE {
364             bail!(
365                 "data size {} is larger than the buffer size {}",
366                 data.len(),
367                 BUF_SIZE
368             );
369         }
370 
371         let addr = self.buffer_address(index)?;
372         fence(Ordering::SeqCst);
373         let written = self
374             .mem
375             .write_at_addr(data, addr)
376             .context("failed to write data")?;
377         if written < data.len() {
378             bail!(
379                 "no enough memory: written {}, but data length is {}",
380                 written,
381                 data.len()
382             );
383         }
384         Ok(addr)
385     }
386 
387     /// Acknowledges buffers that the device used.
ack_used(&mut self) -> Result<()>388     pub fn ack_used(&mut self) -> Result<()> {
389         let used_idx = self.read_used_idx()?;
390         let num_used = used_idx - self.used_count;
391 
392         self.used_count += num_used;
393         self.free_count += num_used;
394 
395         Ok(())
396     }
397 
398     /// Writes the given data to the virtqueue.
write(&mut self, data: &[u8]) -> Result<()>399     pub fn write(&mut self, data: &[u8]) -> Result<()> {
400         if self.device_writable {
401             bail!("driver cannot write to device-writable descriptors");
402         }
403 
404         self.ack_used()?;
405 
406         if self.free_count == Wrapping(0) {
407             // TODO: wait until the device processes buffers.
408             bail!("no avail descriptor is left");
409         }
410 
411         let addr = self
412             .write_to_buffer(self.avail_idx, data)
413             .context("failed to write data to virtqueue")?;
414 
415         let desc = Desc {
416             addr: Le64::from(addr.offset()),
417             len: Le32::from(data.len() as u32),
418             flags: Le16::from(0),
419             next: Le16::from(0),
420         };
421         self.write_desc_entry(self.avail_idx, desc)?;
422         self.free_count -= Wrapping(1);
423 
424         self.avail_idx += Wrapping(1);
425         self.update_avail_index()?;
426 
427         Ok(())
428     }
429 }
430 
431 #[cfg(test)]
432 mod test {
433     use std::cell::RefCell;
434     use std::io::Read;
435     use std::io::Write;
436 
437     use super::*;
438     use crate::virtio::Queue as DeviceQueue;
439     use crate::virtio::Reader;
440     use crate::virtio::Writer;
441 
442     // An allocator that just allocates 0 as an IOVA.
443     struct SimpleIovaAllocator(RefCell<bool>);
444 
445     impl IovaAllocator for SimpleIovaAllocator {
alloc_iova(&self, _size: u64, _tag: u8) -> Result<u64>446         fn alloc_iova(&self, _size: u64, _tag: u8) -> Result<u64> {
447             if *self.0.borrow() {
448                 bail!("exhaused");
449             }
450             *self.0.borrow_mut() = true;
451             Ok(0)
452         }
453 
map_iova(&self, _iova: u64, _size: u64, _addr: *const u8) -> Result<()>454         unsafe fn map_iova(&self, _iova: u64, _size: u64, _addr: *const u8) -> Result<()> {
455             if !*self.0.borrow() {
456                 bail!("not allocated");
457             }
458             Ok(())
459         }
460     }
461 
setup_vq(queue: &mut DeviceQueue, addrs: DescTableAddrs)462     fn setup_vq(queue: &mut DeviceQueue, addrs: DescTableAddrs) {
463         queue.set_desc_table(IOVA(addrs.desc));
464         queue.set_avail_ring(IOVA(addrs.avail));
465         queue.set_used_ring(IOVA(addrs.used));
466         queue.set_ready(true);
467     }
468 
device_write(mem: &QueueMemory, q: &mut DeviceQueue, data: &[u8]) -> usize469     fn device_write(mem: &QueueMemory, q: &mut DeviceQueue, data: &[u8]) -> usize {
470         let desc_chain = q.pop(mem).unwrap();
471         let index = desc_chain.index;
472 
473         let mut writer = Writer::new(mem.clone(), desc_chain).unwrap();
474         let written = writer.write(data).unwrap();
475         q.add_used(mem, index, written as u32);
476         written
477     }
478 
device_read(mem: &QueueMemory, q: &mut DeviceQueue, len: usize) -> Vec<u8>479     fn device_read(mem: &QueueMemory, q: &mut DeviceQueue, len: usize) -> Vec<u8> {
480         let desc_chain = q.pop(mem).unwrap();
481         let desc_index = desc_chain.index;
482         let mut reader = Reader::new(mem.clone(), desc_chain).unwrap();
483         let mut buf = vec![0; len];
484         reader.read_exact(&mut buf).unwrap();
485         q.add_used(mem, desc_index, len as u32);
486         buf
487     }
488 
driver_read(q: &mut UserQueue) -> Vec<u8>489     fn driver_read(q: &mut UserQueue) -> Vec<u8> {
490         let data = q.read_data().unwrap().unwrap();
491         let mut buf = vec![0; data.size()];
492         data.copy_to(&mut buf);
493 
494         buf
495     }
496 
driver_write(q: &mut UserQueue, data: &[u8])497     fn driver_write(q: &mut UserQueue, data: &[u8]) {
498         q.write(data).unwrap()
499     }
500 
501     // Send an array from the driver to the device `count` times.
drv_to_dev(queue_size: u16, count: u32)502     fn drv_to_dev(queue_size: u16, count: u32) {
503         let iova_alloc = SimpleIovaAllocator(RefCell::new(false));
504         let mut drv_queue =
505             UserQueue::new(queue_size, false /* device_writable */, 0, &iova_alloc).unwrap();
506         let mut dev_queue = DeviceQueue::new(queue_size);
507         setup_vq(&mut dev_queue, drv_queue.desc_table_addrs().unwrap());
508 
509         for i in 0..count {
510             let input = vec![(i + 1) as u8; 5];
511             driver_write(&mut drv_queue, &input);
512 
513             let buf = device_read(&drv_queue.mem, &mut dev_queue, input.len());
514             assert_eq!(input, buf);
515             assert!(dev_queue.peek(&drv_queue.mem).is_none());
516         }
517     }
518 
519     #[test]
test_driver_write()520     fn test_driver_write() {
521         let queue_size = 256;
522         let iteration = 20;
523         drv_to_dev(queue_size, iteration);
524     }
525 
526     #[test]
test_driver_write_small_queue()527     fn test_driver_write_small_queue() {
528         // Test with a small queue.
529         let queue_size = 8;
530         let iteration = 20;
531         drv_to_dev(queue_size, iteration);
532     }
533 
534     // This test loops (65536 + 20) times. To avoid running it on slow emulated CI environments,
535     // specify target architecture.
536     // TODO(keiichiw): Change the test to mutate queues' internal state to avoid the actual loop.
537     #[test]
538     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
test_driver_write_wrapping()539     fn test_driver_write_wrapping() {
540         // Test the index can be wrapped around when the iteration count exceeds 16bits.
541         let queue_size = 256;
542 
543         let iteration = u32::from(u16::MAX) + 20;
544         drv_to_dev(queue_size, iteration);
545     }
546 
547     // Send an array from the device to the driver `count` times.
dev_to_drv(queue_size: u16, count: u32)548     fn dev_to_drv(queue_size: u16, count: u32) {
549         let iova_alloc = SimpleIovaAllocator(RefCell::new(false));
550         let mut drv_queue =
551             UserQueue::new(queue_size, true /* device_writable */, 0, &iova_alloc).unwrap();
552         let mut dev_queue = DeviceQueue::new(queue_size);
553         setup_vq(&mut dev_queue, drv_queue.desc_table_addrs().unwrap());
554 
555         for i in 0..count {
556             let input = [i as u8; 5];
557 
558             // Device writes data to driver
559             let written = device_write(&drv_queue.mem, &mut dev_queue, &input);
560             assert_eq!(written, input.len());
561 
562             // Driver reads data
563             let buf = driver_read(&mut drv_queue);
564             assert_eq!(buf, input);
565         }
566     }
567 
568     #[test]
test_driver_read()569     fn test_driver_read() {
570         let queue_size = 256;
571         let iteration = 20;
572         dev_to_drv(queue_size, iteration);
573     }
574 
575     #[test]
test_driver_read_small_queue()576     fn test_driver_read_small_queue() {
577         // Test with a small queue.
578         let queue_size = 8;
579         let iteration = 20;
580         dev_to_drv(queue_size, iteration);
581     }
582 
583     // This test loops (65536 + 20) times. To avoid running it on slow emulated CI environments,
584     // specify target architecture.
585     // TODO(keiichiw): Change the test to mutate queues' internal state to avoid the actual loop.
586     #[test]
587     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
test_driver_read_wrapping()588     fn test_driver_read_wrapping() {
589         // Test the index can be wrapped around when the iteration count exceeds 16bits.
590         let queue_size = 256;
591         let iteration = u32::from(u16::MAX) + 20;
592         dev_to_drv(queue_size, iteration);
593     }
594 }
595