1 // Copyright 2021 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 //! Implement the driver side of virtio queue handling. 6 //! The virtqueue struct is expected to be used in userspace VFIO virtio drivers. 7 8 use std::mem; 9 use std::num::Wrapping; 10 use std::sync::atomic::{fence, Ordering}; 11 12 use anyhow::{anyhow, bail, Context, Result}; 13 use data_model::{DataInit, Le16, Le32, Le64, VolatileSlice}; 14 use virtio_sys::virtio_ring::VRING_DESC_F_WRITE; 15 use vm_memory::{GuestAddress as IOVA, GuestMemory as QueueMemory}; 16 17 use crate::virtio::Desc; 18 19 #[derive(Copy, Clone, Debug)] 20 #[repr(C)] 21 struct UsedElem { 22 id: Le32, 23 len: Le32, 24 } 25 // Safe as there are no implicit offset. 26 unsafe impl DataInit for UsedElem {} 27 28 const BUF_SIZE: u64 = 1024; 29 30 pub struct DescTableAddrs { 31 pub desc: u64, 32 pub avail: u64, 33 pub used: u64, 34 } 35 36 struct MemLayout { 37 /// Address of the descriptor table in UserQueue.mem. 38 desc_table: IOVA, 39 40 /// Address of the available ring in UserQueue.mem. 41 avail_ring: IOVA, 42 43 /// Address of the used ring in UserQueue.mem. 44 used_ring: IOVA, 45 46 /// Address of the start of buffers in UserQueue.mem. 47 buffer_addr: IOVA, 48 } 49 50 /// Represents a virtqueue that is allocated in the guest userspace and manipulated from a VFIO 51 /// driver. 52 /// 53 /// This struct is similar to `devices::virtio::Queue` which is designed for the virtio devices, but 54 /// this struct is defined for the virtio drivers. 55 /// 56 /// # Memory Layout 57 /// 58 /// `mem` is the memory allocated in the guest userspace for the virtqueue, which is mapped into 59 /// the vvu device via VFIO. The GuestAddresses of `mem` are the IOVAs that should be used when 60 /// communicating with the vvu device. All accesses to the shared memory from the device backend 61 /// must be done through the GuestMemory read/write functions. 62 /// 63 /// The layout `mem` is defined in the following table and stored in `mem_layout`. 64 /// 65 /// | | Alignment | Size | 66 /// |-----------------------------------------------------------------| 67 /// | Descriptor Table | 16 | 16 ∗ (Queue Size) | 68 /// | Available Ring | 2 | 6 + 2 ∗ (Queue Size) | 69 /// | Used Ring | 4 | 6 + 8 ∗ (Queue Size) | 70 /// | Buffers | (Buffer Size) | (Buffer Size) * (Queue Size) | 71 /// ------------------------------------------------------------------- 72 /// 73 /// TODO(b/207364742): Once we support `VIRTIO_F_EVENT_IDX`, the additional 2 bytes for the 74 /// `used_event` field will be added. 75 /// TODO(b/215153367): Use `crate::virtio::Queue` as an underlying data structure so that we can use 76 /// `descriptor_utils::{Reader, Writer}` instead of having our own read/write methods. 77 /// One of the biggest blockers is that `virtio::Queue` is designed for device-side's virtqueue, 78 /// where readable/writable areas are inverted from our use case. 79 pub struct UserQueue { 80 /// The queue size. 81 size: Wrapping<u16>, 82 83 /// The underlying memory. 84 mem: QueueMemory, 85 86 /// Virtqueue layout on `mem`. 87 mem_layout: MemLayout, 88 89 avail_idx: Wrapping<u16>, 90 91 used_count: Wrapping<u16>, 92 free_count: Wrapping<u16>, 93 94 /// Whether buffers are device-writable or readable. 95 /// If true, every descriptor has the VIRTQ_DESC_F_WRITE flag. 96 /// TODO(b/215153358, b/215153367): Since VIRTQ_DESC_F_WRITE is a per-descriptor flag, this 97 /// design is specific to the current vvu specification draft, where a device-writable queue 98 /// and a device-readable queue are separated. 99 /// Ideally, we should update the vvu spec to use both device-{readable, writable} buffers in 100 /// one virtqueue. Also, it's better to use `crate::virtio::DescriptorChain` for descirptors as 101 /// a part of b/215153367. 102 device_writable: bool, 103 } 104 105 /// Interface used by UserQueue to interact with the IOMMU. 106 pub trait IovaAllocator { 107 /// Allocates an IO virtual address region of the requested size. alloc_iova(&self, size: u64, tag: u8) -> Result<u64>108 fn alloc_iova(&self, size: u64, tag: u8) -> Result<u64>; 109 /// Maps the given address at the given IOVA. 110 /// 111 /// # Safety 112 /// 113 /// `addr` must reference a region of at least length `size`. Memory passed 114 /// to this function may be mutated at any time, so `addr` must not be memory 115 /// that is directly managed by rust. map_iova(&self, iova: u64, size: u64, addr: *const u8) -> Result<()>116 unsafe fn map_iova(&self, iova: u64, size: u64, addr: *const u8) -> Result<()>; 117 } 118 119 impl UserQueue { 120 /// Creats a `UserQueue` instance. new<I>(queue_size: u16, device_writable: bool, tag: u8, iova_alloc: &I) -> Result<Self> where I: IovaAllocator,121 pub fn new<I>(queue_size: u16, device_writable: bool, tag: u8, iova_alloc: &I) -> Result<Self> 122 where 123 I: IovaAllocator, 124 { 125 let (mem, size, mem_layout) = Self::init_memory(queue_size, tag, iova_alloc)?; 126 127 let mut queue = Self { 128 mem, 129 size: Wrapping(size), 130 mem_layout, 131 avail_idx: Wrapping(0), 132 used_count: Wrapping(0), 133 free_count: Wrapping(size), 134 device_writable, 135 }; 136 137 queue.init_descriptor_table()?; 138 139 Ok(queue) 140 } 141 142 /// Allocates memory region and returns addresses on the regions for (`desc_table`, `avail_ring`, `used_ring`, `buffer``). init_memory<I>( max_queue_size: u16, tag: u8, iova_alloc: &I, ) -> Result<(QueueMemory, u16, MemLayout)> where I: IovaAllocator,143 fn init_memory<I>( 144 max_queue_size: u16, 145 tag: u8, 146 iova_alloc: &I, 147 ) -> Result<(QueueMemory, u16, MemLayout)> 148 where 149 I: IovaAllocator, 150 { 151 // Since vhost-user negotiation finishes within ~20 messages, queue size 32 is enough. 152 const MAX_QUEUE_SIZE: u16 = 256; 153 154 let queue_size = std::cmp::min(MAX_QUEUE_SIZE, max_queue_size); 155 if queue_size == 0 || !queue_size.is_power_of_two() { 156 bail!( 157 "queue_size must be a positive power of 2 number but {}", 158 queue_size 159 ); 160 } 161 162 fn align(n: u64, m: u64) -> u64 { 163 ((n + m - 1) / m) * m 164 } 165 166 let desc_table = IOVA(0); 167 let desc_size = 16u64 * u64::from(queue_size); 168 let desc_end = desc_table.0 + desc_size; 169 170 let avail_ring = IOVA(align(desc_end, 2)); 171 let avail_size = 6 + 2 * u64::from(queue_size); 172 let avail_end = avail_ring.0 + avail_size; 173 174 let used_ring = IOVA(align(avail_end, 4)); 175 let used_size = 6 + 8 * u64::from(queue_size); 176 let used_end = used_ring.0 + used_size; 177 178 let buffer_addr = IOVA(align(used_end, BUF_SIZE)); 179 let buffer_size = BUF_SIZE * u64::from(queue_size); 180 181 let mem_size = align(buffer_addr.0 + buffer_size, base::pagesize() as u64); 182 let iova_start = iova_alloc 183 .alloc_iova(mem_size, tag) 184 .context("failed to allocate queue iova")?; 185 186 let mem = QueueMemory::new(&[(IOVA(iova_start), mem_size)]) 187 .map_err(|e| anyhow!("failed to create QueueMemory for virtqueue: {}", e))?; 188 189 let host_addr = mem 190 .get_host_address_range(IOVA(iova_start), mem_size as usize) 191 .context("failed to get host address")?; 192 // Safe because the region being mapped is managed via the GuestMemory interface. 193 unsafe { 194 iova_alloc 195 .map_iova(iova_start, mem_size, host_addr) 196 .context("failed to map queue")?; 197 } 198 199 let mem_layout = MemLayout { 200 desc_table: desc_table.unchecked_add(iova_start), 201 avail_ring: avail_ring.unchecked_add(iova_start), 202 used_ring: used_ring.unchecked_add(iova_start), 203 buffer_addr: buffer_addr.unchecked_add(iova_start), 204 }; 205 206 Ok((mem, queue_size, mem_layout)) 207 } 208 209 /// Initialize the descriptor table. init_descriptor_table(&mut self) -> Result<()>210 fn init_descriptor_table(&mut self) -> Result<()> { 211 let flags = if self.device_writable { 212 Le16::from(VRING_DESC_F_WRITE as u16) 213 } else { 214 Le16::from(0) 215 }; 216 let len = Le32::from(BUF_SIZE as u32); 217 let next = Le16::from(0); 218 219 // Register pre-allocated buffers to the descriptor area. 220 for i in 0..self.size.0 { 221 let idx = Wrapping(i); 222 let iova = self.buffer_address(idx)?.offset(); 223 let desc = Desc { 224 addr: iova.into(), 225 len, 226 flags, 227 next, 228 }; 229 self.write_desc_entry(idx, desc) 230 .map_err(|e| anyhow!("failed to write {}-th desc: {}", idx, e))?; 231 232 fence(Ordering::SeqCst); 233 self.mem 234 .write_obj_at_addr( 235 idx.0, 236 self.mem_layout 237 .avail_ring 238 .unchecked_add(u64::from(4 + 2 * i)), 239 ) 240 .context("failed to write avail ring")?; 241 } 242 243 // If all of `self`'s buffers are device-writable, expose them to the device. 244 if self.device_writable { 245 for _ in 0..self.size.0 { 246 // TODO(keiichiw): avail_idx should be incremented in update_avail_index 247 self.avail_idx += Wrapping(1); 248 self.update_avail_index()?; 249 } 250 } 251 252 Ok(()) 253 } 254 desc_table_addrs(&self) -> Result<DescTableAddrs>255 pub fn desc_table_addrs(&self) -> Result<DescTableAddrs> { 256 Ok(DescTableAddrs { 257 desc: self.mem_layout.desc_table.offset(), 258 avail: self.mem_layout.avail_ring.offset(), 259 used: self.mem_layout.used_ring.offset(), 260 }) 261 } 262 263 /// Returns the IOVA of the buffer for the given `index`. buffer_address(&self, index: Wrapping<u16>) -> Result<IOVA>264 fn buffer_address(&self, index: Wrapping<u16>) -> Result<IOVA> { 265 let offset = u64::from((index % self.size).0) * BUF_SIZE; 266 self.mem_layout 267 .buffer_addr 268 .checked_add(offset) 269 .ok_or(anyhow!("overflow txq")) 270 } 271 272 /// Writes the given descriptor table entry. write_desc_entry(&self, index: Wrapping<u16>, desc: Desc) -> Result<()>273 fn write_desc_entry(&self, index: Wrapping<u16>, desc: Desc) -> Result<()> { 274 let addr = self 275 .mem_layout 276 .desc_table 277 .unchecked_add(u64::from((index % self.size).0) * mem::size_of::<Desc>() as u64); 278 fence(Ordering::SeqCst); 279 self.mem 280 .write_obj_at_addr(desc, addr) 281 .context("failed to write desc") 282 } 283 284 /// Puts an index into the avail ring for use by the host. update_avail_index(&self) -> Result<()>285 fn update_avail_index(&self) -> Result<()> { 286 fence(Ordering::SeqCst); 287 self.mem 288 .write_obj_at_addr( 289 self.avail_idx.0, 290 self.mem_layout.avail_ring.unchecked_add(2), 291 ) 292 .context("failed to write avail.idx")?; 293 Ok(()) 294 } 295 296 /// Reads the Used ring's index. read_used_idx(&self) -> Result<Wrapping<u16>>297 fn read_used_idx(&self) -> Result<Wrapping<u16>> { 298 let used_index_addr = self.mem_layout.used_ring.unchecked_add(2); 299 fence(Ordering::SeqCst); 300 let used_index: u16 = self.mem.read_obj_from_addr(used_index_addr).unwrap(); 301 Ok(Wrapping(used_index)) 302 } 303 304 /// Reads the Used ring's element for the given index. read_used_elem(&self, idx: Wrapping<u16>) -> Result<UsedElem>305 fn read_used_elem(&self, idx: Wrapping<u16>) -> Result<UsedElem> { 306 let offset = 4 + (idx % self.size).0 as usize * mem::size_of::<UsedElem>(); 307 let addr = self 308 .mem_layout 309 .used_ring 310 .checked_add(offset as u64) 311 .context("overflow")?; 312 fence(Ordering::SeqCst); 313 self.mem 314 .read_obj_from_addr(addr) 315 .context("failed to read used") 316 } 317 318 /// Reads data in the virtqueue. 319 /// Returns `Ok(None)` if no data are available. 320 /// 321 /// TODO: Use `descriptor_utils::Reader`. read_data(&mut self) -> Result<Option<VolatileSlice>>322 pub fn read_data(&mut self) -> Result<Option<VolatileSlice>> { 323 if !self.device_writable { 324 bail!("driver cannot read device-readable descriptors"); 325 } 326 327 let idx = self.read_used_idx()?; 328 let cur = self.used_count; 329 if cur == idx { 330 return Ok(None); 331 } 332 333 let elem = self.read_used_elem(cur)?; 334 335 let id = Wrapping(u32::from(elem.id) as u16); 336 let len = u32::from(elem.len) as usize; 337 338 let addr = self.buffer_address(id)?; 339 340 fence(Ordering::SeqCst); 341 let s = self 342 .mem 343 .get_slice_at_addr(addr, len) 344 .context("failed to read data")?; 345 346 self.used_count += Wrapping(1); 347 self.avail_idx += Wrapping(1); 348 self.update_avail_index()?; 349 Ok(Some(s)) 350 } 351 352 /// Writes data into virtqueue's buffer and returns its address. 353 /// 354 /// TODO: Use `descriptor_utils::Writer`. write_to_buffer(&self, index: Wrapping<u16>, data: &[u8]) -> Result<IOVA>355 fn write_to_buffer(&self, index: Wrapping<u16>, data: &[u8]) -> Result<IOVA> { 356 if data.len() as u64 > BUF_SIZE { 357 bail!( 358 "data size {} is larger than the buffer size {}", 359 data.len(), 360 BUF_SIZE 361 ); 362 } 363 364 let addr = self.buffer_address(index)?; 365 fence(Ordering::SeqCst); 366 let written = self 367 .mem 368 .write_at_addr(data, addr) 369 .context("failed to write data")?; 370 if written < data.len() { 371 bail!( 372 "no enough memory: written {}, but data length is {}", 373 written, 374 data.len() 375 ); 376 } 377 Ok(addr) 378 } 379 380 /// Acknowledges buffers that the device used. ack_used(&mut self) -> Result<()>381 pub fn ack_used(&mut self) -> Result<()> { 382 let used_idx = self.read_used_idx()?; 383 let num_used = used_idx - self.used_count; 384 385 self.used_count += num_used; 386 self.free_count += num_used; 387 388 Ok(()) 389 } 390 391 /// Writes the given data to the virtqueue. write(&mut self, data: &[u8]) -> Result<()>392 pub fn write(&mut self, data: &[u8]) -> Result<()> { 393 if self.device_writable { 394 bail!("driver cannot write to device-writable descriptors"); 395 } 396 397 self.ack_used()?; 398 399 if self.free_count == Wrapping(0) { 400 // TODO: wait until the device processes buffers. 401 bail!("no avail descriptor is left"); 402 } 403 404 let addr = self 405 .write_to_buffer(self.avail_idx, data) 406 .context("failed to write data to virtqueue")?; 407 408 let desc = Desc { 409 addr: Le64::from(addr.offset()), 410 len: Le32::from(data.len() as u32), 411 flags: Le16::from(0), 412 next: Le16::from(0), 413 }; 414 self.write_desc_entry(self.avail_idx, desc)?; 415 self.free_count -= Wrapping(1); 416 417 self.avail_idx += Wrapping(1); 418 self.update_avail_index()?; 419 420 Ok(()) 421 } 422 } 423 424 #[cfg(test)] 425 mod test { 426 use super::*; 427 428 use std::cell::RefCell; 429 use std::io::Read; 430 use std::io::Write; 431 432 use crate::virtio::{Queue as DeviceQueue, Reader, Writer}; 433 434 // An allocator that just allocates 0 as an IOVA. 435 struct SimpleIovaAllocator(RefCell<bool>); 436 437 impl IovaAllocator for SimpleIovaAllocator { alloc_iova(&self, _size: u64, _tag: u8) -> Result<u64>438 fn alloc_iova(&self, _size: u64, _tag: u8) -> Result<u64> { 439 if *self.0.borrow() { 440 bail!("exhaused"); 441 } 442 *self.0.borrow_mut() = true; 443 Ok(0) 444 } 445 map_iova(&self, _iova: u64, _size: u64, _addr: *const u8) -> Result<()>446 unsafe fn map_iova(&self, _iova: u64, _size: u64, _addr: *const u8) -> Result<()> { 447 if !*self.0.borrow() { 448 bail!("not allocated"); 449 } 450 Ok(()) 451 } 452 } 453 setup_vq(queue: &mut DeviceQueue, addrs: DescTableAddrs)454 fn setup_vq(queue: &mut DeviceQueue, addrs: DescTableAddrs) { 455 queue.desc_table = IOVA(addrs.desc); 456 queue.avail_ring = IOVA(addrs.avail); 457 queue.used_ring = IOVA(addrs.used); 458 queue.ready = true; 459 } 460 device_write(mem: &QueueMemory, q: &mut DeviceQueue, data: &[u8]) -> usize461 fn device_write(mem: &QueueMemory, q: &mut DeviceQueue, data: &[u8]) -> usize { 462 let desc_chain = q.pop(mem).unwrap(); 463 let index = desc_chain.index; 464 465 let mut writer = Writer::new(mem.clone(), desc_chain).unwrap(); 466 let written = writer.write(data).unwrap(); 467 q.add_used(mem, index, written as u32); 468 written 469 } 470 device_read(mem: &QueueMemory, q: &mut DeviceQueue, len: usize) -> Vec<u8>471 fn device_read(mem: &QueueMemory, q: &mut DeviceQueue, len: usize) -> Vec<u8> { 472 let desc_chain = q.pop(mem).unwrap(); 473 let desc_index = desc_chain.index; 474 let mut reader = Reader::new(mem.clone(), desc_chain).unwrap(); 475 let mut buf = vec![0; len]; 476 reader.read_exact(&mut buf).unwrap(); 477 q.add_used(mem, desc_index, len as u32); 478 buf 479 } 480 driver_read(q: &mut UserQueue) -> Vec<u8>481 fn driver_read(q: &mut UserQueue) -> Vec<u8> { 482 let data = q.read_data().unwrap().unwrap(); 483 let mut buf = vec![0; data.size()]; 484 data.copy_to(&mut buf); 485 486 buf 487 } 488 driver_write(q: &mut UserQueue, data: &[u8])489 fn driver_write(q: &mut UserQueue, data: &[u8]) { 490 q.write(data).unwrap() 491 } 492 493 // Send an array from the driver to the device `count` times. drv_to_dev(queue_size: u16, count: u32)494 fn drv_to_dev(queue_size: u16, count: u32) { 495 let iova_alloc = SimpleIovaAllocator(RefCell::new(false)); 496 let mut drv_queue = 497 UserQueue::new(queue_size, false /* device_writable */, 0, &iova_alloc).unwrap(); 498 let mut dev_queue = DeviceQueue::new(queue_size); 499 setup_vq(&mut dev_queue, drv_queue.desc_table_addrs().unwrap()); 500 501 for i in 0..count { 502 let input = vec![(i + 1) as u8; 5]; 503 driver_write(&mut drv_queue, &input); 504 505 let buf = device_read(&drv_queue.mem, &mut dev_queue, input.len()); 506 assert_eq!(input, buf); 507 assert!(dev_queue.peek(&drv_queue.mem).is_none()); 508 } 509 } 510 511 #[test] test_driver_write()512 fn test_driver_write() { 513 let queue_size = 256; 514 let iteration = 20; 515 drv_to_dev(queue_size, iteration); 516 } 517 518 #[test] test_driver_write_small_queue()519 fn test_driver_write_small_queue() { 520 // Test with a small queue. 521 let queue_size = 8; 522 let iteration = 20; 523 drv_to_dev(queue_size, iteration); 524 } 525 526 // This test loops (65536 + 20) times. To avoid running it on slow emulated CI environments, 527 // specify target architecture. 528 // TODO(keiichiw): Change the test to mutate queues' internal state to avoid the actual loop. 529 #[test] 530 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] test_driver_write_wrapping()531 fn test_driver_write_wrapping() { 532 // Test the index can be wrapped around when the iteration count exceeds 16bits. 533 let queue_size = 256; 534 535 let iteration = u32::from(u16::MAX) + 20; 536 drv_to_dev(queue_size, iteration); 537 } 538 539 // Send an array from the device to the driver `count` times. dev_to_drv(queue_size: u16, count: u32)540 fn dev_to_drv(queue_size: u16, count: u32) { 541 let iova_alloc = SimpleIovaAllocator(RefCell::new(false)); 542 let mut drv_queue = 543 UserQueue::new(queue_size, true /* device_writable */, 0, &iova_alloc).unwrap(); 544 let mut dev_queue = DeviceQueue::new(queue_size); 545 setup_vq(&mut dev_queue, drv_queue.desc_table_addrs().unwrap()); 546 547 for i in 0..count { 548 let input = [i as u8; 5]; 549 550 // Device writes data to driver 551 let written = device_write(&drv_queue.mem, &mut dev_queue, &input); 552 assert_eq!(written, input.len()); 553 554 // Driver reads data 555 let buf = driver_read(&mut drv_queue); 556 assert_eq!(buf, input); 557 } 558 } 559 560 #[test] test_driver_read()561 fn test_driver_read() { 562 let queue_size = 256; 563 let iteration = 20; 564 dev_to_drv(queue_size, iteration); 565 } 566 567 #[test] test_driver_read_small_queue()568 fn test_driver_read_small_queue() { 569 // Test with a small queue. 570 let queue_size = 8; 571 let iteration = 20; 572 dev_to_drv(queue_size, iteration); 573 } 574 575 // This test loops (65536 + 20) times. To avoid running it on slow emulated CI environments, 576 // specify target architecture. 577 // TODO(keiichiw): Change the test to mutate queues' internal state to avoid the actual loop. 578 #[test] 579 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] test_driver_read_wrapping()580 fn test_driver_read_wrapping() { 581 // Test the index can be wrapped around when the iteration count exceeds 16bits. 582 let queue_size = 256; 583 let iteration = u32::from(u16::MAX) + 20; 584 dev_to_drv(queue_size, iteration); 585 } 586 } 587