1 // Copyright 2021 The ChromiumOS Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 //! Implement the driver side of virtio queue handling. 6 //! The virtqueue struct is expected to be used in userspace VFIO virtio drivers. 7 8 use std::mem; 9 use std::num::Wrapping; 10 use std::sync::atomic::fence; 11 use std::sync::atomic::Ordering; 12 13 use anyhow::anyhow; 14 use anyhow::bail; 15 use anyhow::Context; 16 use anyhow::Result; 17 use data_model::Le16; 18 use data_model::Le32; 19 use data_model::Le64; 20 use data_model::VolatileSlice; 21 use virtio_sys::virtio_ring::VRING_DESC_F_WRITE; 22 use vm_memory::GuestAddress as IOVA; 23 use vm_memory::GuestMemory as QueueMemory; 24 use zerocopy::FromBytes; 25 26 use crate::virtio::Desc; 27 28 #[derive(Copy, Clone, Debug, FromBytes)] 29 #[repr(C)] 30 struct UsedElem { 31 id: Le32, 32 len: Le32, 33 } 34 35 const BUF_SIZE: u64 = 1024; 36 37 pub struct DescTableAddrs { 38 pub desc: u64, 39 pub avail: u64, 40 pub used: u64, 41 } 42 43 struct MemLayout { 44 /// Address of the descriptor table in UserQueue.mem. 45 desc_table: IOVA, 46 47 /// Address of the available ring in UserQueue.mem. 48 avail_ring: IOVA, 49 50 /// Address of the used ring in UserQueue.mem. 51 used_ring: IOVA, 52 53 /// Address of the start of buffers in UserQueue.mem. 54 buffer_addr: IOVA, 55 } 56 57 /// Represents a virtqueue that is allocated in the guest userspace and manipulated from a VFIO 58 /// driver. 59 /// 60 /// This struct is similar to `devices::virtio::Queue` which is designed for the virtio devices, but 61 /// this struct is defined for the virtio drivers. 62 /// 63 /// # Memory Layout 64 /// 65 /// `mem` is the memory allocated in the guest userspace for the virtqueue, which is mapped into 66 /// the vvu device via VFIO. The GuestAddresses of `mem` are the IOVAs that should be used when 67 /// communicating with the vvu device. All accesses to the shared memory from the device backend 68 /// must be done through the GuestMemory read/write functions. 69 /// 70 /// The layout `mem` is defined in the following table and stored in `mem_layout`. 71 /// 72 /// | | Alignment | Size | 73 /// |-----------------------------------------------------------------| 74 /// | Descriptor Table | 16 | 16 ∗ (Queue Size) | 75 /// | Available Ring | 2 | 6 + 2 ∗ (Queue Size) | 76 /// | Used Ring | 4 | 6 + 8 ∗ (Queue Size) | 77 /// | Buffers | (Buffer Size) | (Buffer Size) * (Queue Size) | 78 /// ------------------------------------------------------------------- 79 /// 80 /// TODO(b/207364742): Once we support `VIRTIO_F_EVENT_IDX`, the additional 2 bytes for the 81 /// `used_event` field will be added. 82 /// TODO(b/215153367): Use `crate::virtio::Queue` as an underlying data structure so that we can use 83 /// `descriptor_utils::{Reader, Writer}` instead of having our own read/write methods. 84 /// One of the biggest blockers is that `virtio::Queue` is designed for device-side's virtqueue, 85 /// where readable/writable areas are inverted from our use case. 86 pub struct UserQueue { 87 /// The queue size. 88 size: Wrapping<u16>, 89 90 /// The underlying memory. 91 mem: QueueMemory, 92 93 /// Virtqueue layout on `mem`. 94 mem_layout: MemLayout, 95 96 avail_idx: Wrapping<u16>, 97 98 used_count: Wrapping<u16>, 99 free_count: Wrapping<u16>, 100 101 /// Whether buffers are device-writable or readable. 102 /// If true, every descriptor has the VIRTQ_DESC_F_WRITE flag. 103 /// TODO(b/215153358, b/215153367): Since VIRTQ_DESC_F_WRITE is a per-descriptor flag, this 104 /// design is specific to the current vvu specification draft, where a device-writable queue 105 /// and a device-readable queue are separated. 106 /// Ideally, we should update the vvu spec to use both device-{readable, writable} buffers in 107 /// one virtqueue. Also, it's better to use `crate::virtio::DescriptorChain` for descirptors as 108 /// a part of b/215153367. 109 device_writable: bool, 110 } 111 112 /// Interface used by UserQueue to interact with the IOMMU. 113 pub trait IovaAllocator { 114 /// Allocates an IO virtual address region of the requested size. alloc_iova(&self, size: u64, tag: u8) -> Result<u64>115 fn alloc_iova(&self, size: u64, tag: u8) -> Result<u64>; 116 /// Maps the given address at the given IOVA. 117 /// 118 /// # Safety 119 /// 120 /// `addr` must reference a region of at least length `size`. Memory passed 121 /// to this function may be mutated at any time, so `addr` must not be memory 122 /// that is directly managed by rust. map_iova(&self, iova: u64, size: u64, addr: *const u8) -> Result<()>123 unsafe fn map_iova(&self, iova: u64, size: u64, addr: *const u8) -> Result<()>; 124 } 125 126 impl UserQueue { 127 /// Creats a `UserQueue` instance. new<I>(queue_size: u16, device_writable: bool, tag: u8, iova_alloc: &I) -> Result<Self> where I: IovaAllocator,128 pub fn new<I>(queue_size: u16, device_writable: bool, tag: u8, iova_alloc: &I) -> Result<Self> 129 where 130 I: IovaAllocator, 131 { 132 let (mem, size, mem_layout) = Self::init_memory(queue_size, tag, iova_alloc)?; 133 134 let mut queue = Self { 135 mem, 136 size: Wrapping(size), 137 mem_layout, 138 avail_idx: Wrapping(0), 139 used_count: Wrapping(0), 140 free_count: Wrapping(size), 141 device_writable, 142 }; 143 144 queue.init_descriptor_table()?; 145 146 Ok(queue) 147 } 148 149 /// Allocates memory region and returns addresses on the regions for (`desc_table`, `avail_ring`, `used_ring`, `buffer``). init_memory<I>( max_queue_size: u16, tag: u8, iova_alloc: &I, ) -> Result<(QueueMemory, u16, MemLayout)> where I: IovaAllocator,150 fn init_memory<I>( 151 max_queue_size: u16, 152 tag: u8, 153 iova_alloc: &I, 154 ) -> Result<(QueueMemory, u16, MemLayout)> 155 where 156 I: IovaAllocator, 157 { 158 // Since vhost-user negotiation finishes within ~20 messages, queue size 32 is enough. 159 const MAX_QUEUE_SIZE: u16 = 256; 160 161 let queue_size = std::cmp::min(MAX_QUEUE_SIZE, max_queue_size); 162 if queue_size == 0 || !queue_size.is_power_of_two() { 163 bail!( 164 "queue_size must be a positive power of 2 number but {}", 165 queue_size 166 ); 167 } 168 169 fn align(n: u64, m: u64) -> u64 { 170 ((n + m - 1) / m) * m 171 } 172 173 let desc_table = IOVA(0); 174 let desc_size = 16u64 * u64::from(queue_size); 175 let desc_end = desc_table.0 + desc_size; 176 177 let avail_ring = IOVA(align(desc_end, 2)); 178 let avail_size = 6 + 2 * u64::from(queue_size); 179 let avail_end = avail_ring.0 + avail_size; 180 181 let used_ring = IOVA(align(avail_end, 4)); 182 let used_size = 6 + 8 * u64::from(queue_size); 183 let used_end = used_ring.0 + used_size; 184 185 let buffer_addr = IOVA(align(used_end, BUF_SIZE)); 186 let buffer_size = BUF_SIZE * u64::from(queue_size); 187 188 let mem_size = align(buffer_addr.0 + buffer_size, base::pagesize() as u64); 189 let iova_start = iova_alloc 190 .alloc_iova(mem_size, tag) 191 .context("failed to allocate queue iova")?; 192 193 let mem = QueueMemory::new(&[(IOVA(iova_start), mem_size)]) 194 .map_err(|e| anyhow!("failed to create QueueMemory for virtqueue: {}", e))?; 195 196 let host_addr = mem 197 .get_host_address_range(IOVA(iova_start), mem_size as usize) 198 .context("failed to get host address")?; 199 // Safe because the region being mapped is managed via the GuestMemory interface. 200 unsafe { 201 iova_alloc 202 .map_iova(iova_start, mem_size, host_addr) 203 .context("failed to map queue")?; 204 } 205 206 let mem_layout = MemLayout { 207 desc_table: desc_table.unchecked_add(iova_start), 208 avail_ring: avail_ring.unchecked_add(iova_start), 209 used_ring: used_ring.unchecked_add(iova_start), 210 buffer_addr: buffer_addr.unchecked_add(iova_start), 211 }; 212 213 Ok((mem, queue_size, mem_layout)) 214 } 215 216 /// Initialize the descriptor table. init_descriptor_table(&mut self) -> Result<()>217 fn init_descriptor_table(&mut self) -> Result<()> { 218 let flags = if self.device_writable { 219 Le16::from(VRING_DESC_F_WRITE as u16) 220 } else { 221 Le16::from(0) 222 }; 223 let len = Le32::from(BUF_SIZE as u32); 224 let next = Le16::from(0); 225 226 // Register pre-allocated buffers to the descriptor area. 227 for i in 0..self.size.0 { 228 let idx = Wrapping(i); 229 let iova = self.buffer_address(idx)?.offset(); 230 let desc = Desc { 231 addr: iova.into(), 232 len, 233 flags, 234 next, 235 }; 236 self.write_desc_entry(idx, desc) 237 .map_err(|e| anyhow!("failed to write {}-th desc: {}", idx, e))?; 238 239 fence(Ordering::SeqCst); 240 self.mem 241 .write_obj_at_addr( 242 idx.0, 243 self.mem_layout 244 .avail_ring 245 .unchecked_add(u64::from(4 + 2 * i)), 246 ) 247 .context("failed to write avail ring")?; 248 } 249 250 // If all of `self`'s buffers are device-writable, expose them to the device. 251 if self.device_writable { 252 for _ in 0..self.size.0 { 253 // TODO(keiichiw): avail_idx should be incremented in update_avail_index 254 self.avail_idx += Wrapping(1); 255 self.update_avail_index()?; 256 } 257 } 258 259 Ok(()) 260 } 261 desc_table_addrs(&self) -> Result<DescTableAddrs>262 pub fn desc_table_addrs(&self) -> Result<DescTableAddrs> { 263 Ok(DescTableAddrs { 264 desc: self.mem_layout.desc_table.offset(), 265 avail: self.mem_layout.avail_ring.offset(), 266 used: self.mem_layout.used_ring.offset(), 267 }) 268 } 269 270 /// Returns the IOVA of the buffer for the given `index`. buffer_address(&self, index: Wrapping<u16>) -> Result<IOVA>271 fn buffer_address(&self, index: Wrapping<u16>) -> Result<IOVA> { 272 let offset = u64::from((index % self.size).0) * BUF_SIZE; 273 self.mem_layout 274 .buffer_addr 275 .checked_add(offset) 276 .ok_or(anyhow!("overflow txq")) 277 } 278 279 /// Writes the given descriptor table entry. write_desc_entry(&self, index: Wrapping<u16>, desc: Desc) -> Result<()>280 fn write_desc_entry(&self, index: Wrapping<u16>, desc: Desc) -> Result<()> { 281 let addr = self 282 .mem_layout 283 .desc_table 284 .unchecked_add(u64::from((index % self.size).0) * mem::size_of::<Desc>() as u64); 285 fence(Ordering::SeqCst); 286 self.mem 287 .write_obj_at_addr(desc, addr) 288 .context("failed to write desc") 289 } 290 291 /// Puts an index into the avail ring for use by the host. update_avail_index(&self) -> Result<()>292 fn update_avail_index(&self) -> Result<()> { 293 fence(Ordering::SeqCst); 294 self.mem 295 .write_obj_at_addr( 296 self.avail_idx.0, 297 self.mem_layout.avail_ring.unchecked_add(2), 298 ) 299 .context("failed to write avail.idx")?; 300 Ok(()) 301 } 302 303 /// Reads the Used ring's index. read_used_idx(&self) -> Result<Wrapping<u16>>304 fn read_used_idx(&self) -> Result<Wrapping<u16>> { 305 let used_index_addr = self.mem_layout.used_ring.unchecked_add(2); 306 fence(Ordering::SeqCst); 307 let used_index: u16 = self.mem.read_obj_from_addr(used_index_addr).unwrap(); 308 Ok(Wrapping(used_index)) 309 } 310 311 /// Reads the Used ring's element for the given index. read_used_elem(&self, idx: Wrapping<u16>) -> Result<UsedElem>312 fn read_used_elem(&self, idx: Wrapping<u16>) -> Result<UsedElem> { 313 let offset = 4 + (idx % self.size).0 as usize * mem::size_of::<UsedElem>(); 314 let addr = self 315 .mem_layout 316 .used_ring 317 .checked_add(offset as u64) 318 .context("overflow")?; 319 fence(Ordering::SeqCst); 320 self.mem 321 .read_obj_from_addr(addr) 322 .context("failed to read used") 323 } 324 325 /// Reads data in the virtqueue. 326 /// Returns `Ok(None)` if no data are available. 327 /// 328 /// TODO: Use `descriptor_utils::Reader`. read_data(&mut self) -> Result<Option<VolatileSlice>>329 pub fn read_data(&mut self) -> Result<Option<VolatileSlice>> { 330 if !self.device_writable { 331 bail!("driver cannot read device-readable descriptors"); 332 } 333 334 let idx = self.read_used_idx()?; 335 let cur = self.used_count; 336 if cur == idx { 337 return Ok(None); 338 } 339 340 let elem = self.read_used_elem(cur)?; 341 342 let id = Wrapping(u32::from(elem.id) as u16); 343 let len = u32::from(elem.len) as usize; 344 345 let addr = self.buffer_address(id)?; 346 347 fence(Ordering::SeqCst); 348 let s = self 349 .mem 350 .get_slice_at_addr(addr, len) 351 .context("failed to read data")?; 352 353 self.used_count += Wrapping(1); 354 self.avail_idx += Wrapping(1); 355 self.update_avail_index()?; 356 Ok(Some(s)) 357 } 358 359 /// Writes data into virtqueue's buffer and returns its address. 360 /// 361 /// TODO: Use `descriptor_utils::Writer`. write_to_buffer(&self, index: Wrapping<u16>, data: &[u8]) -> Result<IOVA>362 fn write_to_buffer(&self, index: Wrapping<u16>, data: &[u8]) -> Result<IOVA> { 363 if data.len() as u64 > BUF_SIZE { 364 bail!( 365 "data size {} is larger than the buffer size {}", 366 data.len(), 367 BUF_SIZE 368 ); 369 } 370 371 let addr = self.buffer_address(index)?; 372 fence(Ordering::SeqCst); 373 let written = self 374 .mem 375 .write_at_addr(data, addr) 376 .context("failed to write data")?; 377 if written < data.len() { 378 bail!( 379 "no enough memory: written {}, but data length is {}", 380 written, 381 data.len() 382 ); 383 } 384 Ok(addr) 385 } 386 387 /// Acknowledges buffers that the device used. ack_used(&mut self) -> Result<()>388 pub fn ack_used(&mut self) -> Result<()> { 389 let used_idx = self.read_used_idx()?; 390 let num_used = used_idx - self.used_count; 391 392 self.used_count += num_used; 393 self.free_count += num_used; 394 395 Ok(()) 396 } 397 398 /// Writes the given data to the virtqueue. write(&mut self, data: &[u8]) -> Result<()>399 pub fn write(&mut self, data: &[u8]) -> Result<()> { 400 if self.device_writable { 401 bail!("driver cannot write to device-writable descriptors"); 402 } 403 404 self.ack_used()?; 405 406 if self.free_count == Wrapping(0) { 407 // TODO: wait until the device processes buffers. 408 bail!("no avail descriptor is left"); 409 } 410 411 let addr = self 412 .write_to_buffer(self.avail_idx, data) 413 .context("failed to write data to virtqueue")?; 414 415 let desc = Desc { 416 addr: Le64::from(addr.offset()), 417 len: Le32::from(data.len() as u32), 418 flags: Le16::from(0), 419 next: Le16::from(0), 420 }; 421 self.write_desc_entry(self.avail_idx, desc)?; 422 self.free_count -= Wrapping(1); 423 424 self.avail_idx += Wrapping(1); 425 self.update_avail_index()?; 426 427 Ok(()) 428 } 429 } 430 431 #[cfg(test)] 432 mod test { 433 use std::cell::RefCell; 434 use std::io::Read; 435 use std::io::Write; 436 437 use super::*; 438 use crate::virtio::Queue as DeviceQueue; 439 use crate::virtio::Reader; 440 use crate::virtio::Writer; 441 442 // An allocator that just allocates 0 as an IOVA. 443 struct SimpleIovaAllocator(RefCell<bool>); 444 445 impl IovaAllocator for SimpleIovaAllocator { alloc_iova(&self, _size: u64, _tag: u8) -> Result<u64>446 fn alloc_iova(&self, _size: u64, _tag: u8) -> Result<u64> { 447 if *self.0.borrow() { 448 bail!("exhaused"); 449 } 450 *self.0.borrow_mut() = true; 451 Ok(0) 452 } 453 map_iova(&self, _iova: u64, _size: u64, _addr: *const u8) -> Result<()>454 unsafe fn map_iova(&self, _iova: u64, _size: u64, _addr: *const u8) -> Result<()> { 455 if !*self.0.borrow() { 456 bail!("not allocated"); 457 } 458 Ok(()) 459 } 460 } 461 setup_vq(queue: &mut DeviceQueue, addrs: DescTableAddrs)462 fn setup_vq(queue: &mut DeviceQueue, addrs: DescTableAddrs) { 463 queue.set_desc_table(IOVA(addrs.desc)); 464 queue.set_avail_ring(IOVA(addrs.avail)); 465 queue.set_used_ring(IOVA(addrs.used)); 466 queue.set_ready(true); 467 } 468 device_write(mem: &QueueMemory, q: &mut DeviceQueue, data: &[u8]) -> usize469 fn device_write(mem: &QueueMemory, q: &mut DeviceQueue, data: &[u8]) -> usize { 470 let desc_chain = q.pop(mem).unwrap(); 471 let index = desc_chain.index; 472 473 let mut writer = Writer::new(mem.clone(), desc_chain).unwrap(); 474 let written = writer.write(data).unwrap(); 475 q.add_used(mem, index, written as u32); 476 written 477 } 478 device_read(mem: &QueueMemory, q: &mut DeviceQueue, len: usize) -> Vec<u8>479 fn device_read(mem: &QueueMemory, q: &mut DeviceQueue, len: usize) -> Vec<u8> { 480 let desc_chain = q.pop(mem).unwrap(); 481 let desc_index = desc_chain.index; 482 let mut reader = Reader::new(mem.clone(), desc_chain).unwrap(); 483 let mut buf = vec![0; len]; 484 reader.read_exact(&mut buf).unwrap(); 485 q.add_used(mem, desc_index, len as u32); 486 buf 487 } 488 driver_read(q: &mut UserQueue) -> Vec<u8>489 fn driver_read(q: &mut UserQueue) -> Vec<u8> { 490 let data = q.read_data().unwrap().unwrap(); 491 let mut buf = vec![0; data.size()]; 492 data.copy_to(&mut buf); 493 494 buf 495 } 496 driver_write(q: &mut UserQueue, data: &[u8])497 fn driver_write(q: &mut UserQueue, data: &[u8]) { 498 q.write(data).unwrap() 499 } 500 501 // Send an array from the driver to the device `count` times. drv_to_dev(queue_size: u16, count: u32)502 fn drv_to_dev(queue_size: u16, count: u32) { 503 let iova_alloc = SimpleIovaAllocator(RefCell::new(false)); 504 let mut drv_queue = 505 UserQueue::new(queue_size, false /* device_writable */, 0, &iova_alloc).unwrap(); 506 let mut dev_queue = DeviceQueue::new(queue_size); 507 setup_vq(&mut dev_queue, drv_queue.desc_table_addrs().unwrap()); 508 509 for i in 0..count { 510 let input = vec![(i + 1) as u8; 5]; 511 driver_write(&mut drv_queue, &input); 512 513 let buf = device_read(&drv_queue.mem, &mut dev_queue, input.len()); 514 assert_eq!(input, buf); 515 assert!(dev_queue.peek(&drv_queue.mem).is_none()); 516 } 517 } 518 519 #[test] test_driver_write()520 fn test_driver_write() { 521 let queue_size = 256; 522 let iteration = 20; 523 drv_to_dev(queue_size, iteration); 524 } 525 526 #[test] test_driver_write_small_queue()527 fn test_driver_write_small_queue() { 528 // Test with a small queue. 529 let queue_size = 8; 530 let iteration = 20; 531 drv_to_dev(queue_size, iteration); 532 } 533 534 // This test loops (65536 + 20) times. To avoid running it on slow emulated CI environments, 535 // specify target architecture. 536 // TODO(keiichiw): Change the test to mutate queues' internal state to avoid the actual loop. 537 #[test] 538 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] test_driver_write_wrapping()539 fn test_driver_write_wrapping() { 540 // Test the index can be wrapped around when the iteration count exceeds 16bits. 541 let queue_size = 256; 542 543 let iteration = u32::from(u16::MAX) + 20; 544 drv_to_dev(queue_size, iteration); 545 } 546 547 // Send an array from the device to the driver `count` times. dev_to_drv(queue_size: u16, count: u32)548 fn dev_to_drv(queue_size: u16, count: u32) { 549 let iova_alloc = SimpleIovaAllocator(RefCell::new(false)); 550 let mut drv_queue = 551 UserQueue::new(queue_size, true /* device_writable */, 0, &iova_alloc).unwrap(); 552 let mut dev_queue = DeviceQueue::new(queue_size); 553 setup_vq(&mut dev_queue, drv_queue.desc_table_addrs().unwrap()); 554 555 for i in 0..count { 556 let input = [i as u8; 5]; 557 558 // Device writes data to driver 559 let written = device_write(&drv_queue.mem, &mut dev_queue, &input); 560 assert_eq!(written, input.len()); 561 562 // Driver reads data 563 let buf = driver_read(&mut drv_queue); 564 assert_eq!(buf, input); 565 } 566 } 567 568 #[test] test_driver_read()569 fn test_driver_read() { 570 let queue_size = 256; 571 let iteration = 20; 572 dev_to_drv(queue_size, iteration); 573 } 574 575 #[test] test_driver_read_small_queue()576 fn test_driver_read_small_queue() { 577 // Test with a small queue. 578 let queue_size = 8; 579 let iteration = 20; 580 dev_to_drv(queue_size, iteration); 581 } 582 583 // This test loops (65536 + 20) times. To avoid running it on slow emulated CI environments, 584 // specify target architecture. 585 // TODO(keiichiw): Change the test to mutate queues' internal state to avoid the actual loop. 586 #[test] 587 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] test_driver_read_wrapping()588 fn test_driver_read_wrapping() { 589 // Test the index can be wrapped around when the iteration count exceeds 16bits. 590 let queue_size = 256; 591 let iteration = u32::from(u16::MAX) + 20; 592 dev_to_drv(queue_size, iteration); 593 } 594 } 595