1 // Copyright 2017 The ChromiumOS Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 use std::num::Wrapping; 6 use std::sync::atomic::fence; 7 use std::sync::atomic::Ordering; 8 use std::sync::Arc; 9 10 use anyhow::bail; 11 use anyhow::Context; 12 use anyhow::Result; 13 use base::error; 14 use base::warn; 15 use base::Protection; 16 use cros_async::AsyncError; 17 use cros_async::EventAsync; 18 use data_model::Le16; 19 use data_model::Le32; 20 use data_model::Le64; 21 use smallvec::smallvec; 22 use smallvec::SmallVec; 23 use sync::Mutex; 24 use virtio_sys::virtio_ring::VIRTIO_RING_F_EVENT_IDX; 25 use vm_memory::GuestAddress; 26 use vm_memory::GuestMemory; 27 use zerocopy::AsBytes; 28 use zerocopy::FromBytes; 29 30 use super::SignalableInterrupt; 31 use super::VIRTIO_MSI_NO_VECTOR; 32 use crate::virtio::ipc_memory_mapper::ExportedRegion; 33 use crate::virtio::ipc_memory_mapper::IpcMemoryMapper; 34 use crate::virtio::memory_mapper::MemRegion; 35 use crate::virtio::memory_util::read_obj_from_addr_wrapper; 36 use crate::virtio::memory_util::write_obj_at_addr_wrapper; 37 38 const VIRTQ_DESC_F_NEXT: u16 = 0x1; 39 const VIRTQ_DESC_F_WRITE: u16 = 0x2; 40 #[allow(dead_code)] 41 const VIRTQ_DESC_F_INDIRECT: u16 = 0x4; 42 43 #[allow(dead_code)] 44 const VIRTQ_USED_F_NO_NOTIFY: u16 = 0x1; 45 #[allow(dead_code)] 46 const VIRTQ_AVAIL_F_NO_INTERRUPT: u16 = 0x1; 47 48 /// An iterator over a single descriptor chain. Not to be confused with AvailIter, 49 /// which iterates over the descriptor chain heads in a queue. 50 pub struct DescIter { 51 next: Option<DescriptorChain>, 52 } 53 54 impl DescIter { 55 /// Returns an iterator that only yields the readable descriptors in the chain. readable(self) -> impl Iterator<Item = DescriptorChain>56 pub fn readable(self) -> impl Iterator<Item = DescriptorChain> { 57 self.take_while(DescriptorChain::is_read_only) 58 } 59 60 /// Returns an iterator that only yields the writable descriptors in the chain. writable(self) -> impl Iterator<Item = DescriptorChain>61 pub fn writable(self) -> impl Iterator<Item = DescriptorChain> { 62 self.skip_while(DescriptorChain::is_read_only) 63 } 64 } 65 66 impl Iterator for DescIter { 67 type Item = DescriptorChain; 68 next(&mut self) -> Option<Self::Item>69 fn next(&mut self) -> Option<Self::Item> { 70 if let Some(current) = self.next.take() { 71 self.next = current.next_descriptor(); 72 Some(current) 73 } else { 74 None 75 } 76 } 77 } 78 79 /// A virtio descriptor chain. 80 #[derive(Clone)] 81 pub struct DescriptorChain { 82 mem: GuestMemory, 83 desc_table: GuestAddress, 84 queue_size: u16, 85 ttl: u16, // used to prevent infinite chain cycles 86 87 /// Index into the descriptor table 88 pub index: u16, 89 90 /// Guest physical address of device specific data, or IO virtual address 91 /// if iommu is used 92 pub addr: GuestAddress, 93 94 /// Length of device specific data 95 pub len: u32, 96 97 /// Includes next, write, and indirect bits 98 pub flags: u16, 99 100 /// Index into the descriptor table of the next descriptor if flags has 101 /// the next bit set 102 pub next: u16, 103 104 /// The memory regions associated with the current descriptor. 105 regions: SmallVec<[MemRegion; 1]>, 106 107 /// Translates `addr` to guest physical address 108 iommu: Option<Arc<Mutex<IpcMemoryMapper>>>, 109 110 /// The exported descriptor table of this chain's queue. Present 111 /// iff iommu is present. 112 exported_desc_table: Option<ExportedRegion>, 113 114 /// The exported iommu region of the current descriptor. Present iff 115 /// iommu is present. 116 exported_region: Option<ExportedRegion>, 117 } 118 119 #[derive(Copy, Clone, Debug, FromBytes, AsBytes)] 120 #[repr(C)] 121 pub struct Desc { 122 pub addr: Le64, 123 pub len: Le32, 124 pub flags: Le16, 125 pub next: Le16, 126 } 127 128 impl DescriptorChain { checked_new( mem: &GuestMemory, desc_table: GuestAddress, queue_size: u16, index: u16, required_flags: u16, iommu: Option<Arc<Mutex<IpcMemoryMapper>>>, exported_desc_table: Option<ExportedRegion>, ) -> Result<DescriptorChain>129 pub(crate) fn checked_new( 130 mem: &GuestMemory, 131 desc_table: GuestAddress, 132 queue_size: u16, 133 index: u16, 134 required_flags: u16, 135 iommu: Option<Arc<Mutex<IpcMemoryMapper>>>, 136 exported_desc_table: Option<ExportedRegion>, 137 ) -> Result<DescriptorChain> { 138 if index >= queue_size { 139 bail!("index ({}) >= queue_size ({})", index, queue_size); 140 } 141 142 let desc_head = desc_table 143 .checked_add((index as u64) * 16) 144 .context("integer overflow")?; 145 let desc: Desc = read_obj_from_addr_wrapper(mem, &exported_desc_table, desc_head) 146 .with_context(|| format!("failed to read desc {:x}", desc_head.offset()))?; 147 148 let addr = GuestAddress(desc.addr.into()); 149 let len = desc.len.to_native(); 150 let (regions, exported_region) = if let Some(iommu) = &iommu { 151 if exported_desc_table.is_none() { 152 bail!("missing exported descriptor table"); 153 } 154 155 let exported_region = 156 ExportedRegion::new(mem, iommu.clone(), addr.offset(), len.into()) 157 .context("failed to get mem regions")?; 158 159 let regions = exported_region.get_mem_regions(); 160 let required_prot = if required_flags & VIRTQ_DESC_F_WRITE == 0 { 161 Protection::read() 162 } else { 163 Protection::write() 164 }; 165 for r in ®ions { 166 if !r.prot.allows(&required_prot) { 167 bail!("missing RW permissions for descriptor"); 168 } 169 } 170 171 (regions, Some(exported_region)) 172 } else { 173 ( 174 smallvec![MemRegion { 175 gpa: addr, 176 len: len.into(), 177 prot: Protection::read_write(), 178 }], 179 None, 180 ) 181 }; 182 183 let chain = DescriptorChain { 184 mem: mem.clone(), 185 desc_table, 186 queue_size, 187 ttl: queue_size, 188 index, 189 addr, 190 len, 191 flags: desc.flags.into(), 192 next: desc.next.into(), 193 iommu, 194 regions, 195 exported_region, 196 exported_desc_table, 197 }; 198 199 if chain.is_valid() && chain.flags & required_flags == required_flags { 200 Ok(chain) 201 } else { 202 bail!("chain is invalid") 203 } 204 } 205 into_mem_regions(self) -> (SmallVec<[MemRegion; 1]>, Option<ExportedRegion>)206 pub fn into_mem_regions(self) -> (SmallVec<[MemRegion; 1]>, Option<ExportedRegion>) { 207 (self.regions, self.exported_region) 208 } 209 is_valid(&self) -> bool210 fn is_valid(&self) -> bool { 211 if self.len > 0 { 212 // Each region in `self.regions` must be a contiguous range in `self.mem`. 213 if !self 214 .regions 215 .iter() 216 .all(|r| self.mem.is_valid_range(r.gpa, r.len as u64)) 217 { 218 return false; 219 } 220 } 221 222 !self.has_next() || self.next < self.queue_size 223 } 224 225 /// Gets if this descriptor chain has another descriptor chain linked after it. has_next(&self) -> bool226 pub fn has_next(&self) -> bool { 227 self.flags & VIRTQ_DESC_F_NEXT != 0 && self.ttl > 1 228 } 229 230 /// If the driver designated this as a write only descriptor. 231 /// 232 /// If this is false, this descriptor is read only. 233 /// Write only means the the emulated device can write and the driver can read. is_write_only(&self) -> bool234 pub fn is_write_only(&self) -> bool { 235 self.flags & VIRTQ_DESC_F_WRITE != 0 236 } 237 238 /// If the driver designated this as a read only descriptor. 239 /// 240 /// If this is false, this descriptor is write only. 241 /// Read only means the emulated device can read and the driver can write. is_read_only(&self) -> bool242 pub fn is_read_only(&self) -> bool { 243 self.flags & VIRTQ_DESC_F_WRITE == 0 244 } 245 246 /// Gets the next descriptor in this descriptor chain, if there is one. 247 /// 248 /// Note that this is distinct from the next descriptor chain returned by `AvailIter`, which is 249 /// the head of the next _available_ descriptor chain. next_descriptor(&self) -> Option<DescriptorChain>250 pub fn next_descriptor(&self) -> Option<DescriptorChain> { 251 if self.has_next() { 252 // Once we see a write-only descriptor, all subsequent descriptors must be write-only. 253 let required_flags = self.flags & VIRTQ_DESC_F_WRITE; 254 let iommu = self.iommu.as_ref().map(Arc::clone); 255 match DescriptorChain::checked_new( 256 &self.mem, 257 self.desc_table, 258 self.queue_size, 259 self.next, 260 required_flags, 261 iommu, 262 self.exported_desc_table.clone(), 263 ) { 264 Ok(mut c) => { 265 c.ttl = self.ttl - 1; 266 Some(c) 267 } 268 Err(e) => { 269 error!("{:#}", e); 270 None 271 } 272 } 273 } else { 274 None 275 } 276 } 277 278 /// Produces an iterator over all the descriptors in this chain. into_iter(self) -> DescIter279 pub fn into_iter(self) -> DescIter { 280 DescIter { next: Some(self) } 281 } 282 } 283 284 /// Consuming iterator over all available descriptor chain heads in the queue. 285 pub struct AvailIter<'a, 'b> { 286 mem: &'a GuestMemory, 287 queue: &'b mut Queue, 288 } 289 290 impl<'a, 'b> Iterator for AvailIter<'a, 'b> { 291 type Item = DescriptorChain; 292 next(&mut self) -> Option<Self::Item>293 fn next(&mut self) -> Option<Self::Item> { 294 self.queue.pop(self.mem) 295 } 296 } 297 298 /// A virtio queue's parameters. 299 pub struct Queue { 300 /// Whether this queue has already been activated. 301 activated: bool, 302 303 /// The maximal size in elements offered by the device 304 max_size: u16, 305 306 /// The queue size in elements the driver selected. This is always guaranteed to be a power of 307 /// two less than or equal to `max_size`, as required for split virtqueues. These invariants are 308 /// enforced by `set_size()`. 309 size: u16, 310 311 /// Inidcates if the queue is finished with configuration 312 ready: bool, 313 314 /// MSI-X vector for the queue. Don't care for INTx 315 vector: u16, 316 317 /// Guest physical address of the descriptor table 318 desc_table: GuestAddress, 319 320 /// Guest physical address of the available ring 321 avail_ring: GuestAddress, 322 323 /// Guest physical address of the used ring 324 used_ring: GuestAddress, 325 326 pub next_avail: Wrapping<u16>, 327 pub next_used: Wrapping<u16>, 328 329 // Device feature bits accepted by the driver 330 features: u64, 331 last_used: Wrapping<u16>, 332 333 iommu: Option<Arc<Mutex<IpcMemoryMapper>>>, 334 335 // When |iommu| is present, |desc_table| and the rings are IOVAs rather than real 336 // GPAs. These are the exported regions used to access the underlying GPAs. They 337 // are initialized by |export_memory| and released by |release_exported_memory|. 338 exported_desc_table: Option<ExportedRegion>, 339 exported_avail_ring: Option<ExportedRegion>, 340 exported_used_ring: Option<ExportedRegion>, 341 } 342 343 macro_rules! accessors { 344 ($var:ident, $t:ty, $setter:ident) => { 345 pub fn $var(&self) -> $t { 346 self.$var 347 } 348 349 pub fn $setter(&mut self, val: $t) { 350 if self.ready { 351 warn!("ignoring write to {} on ready queue", stringify!($var)); 352 return; 353 } 354 self.$var = val; 355 } 356 }; 357 } 358 359 impl Queue { 360 /// Constructs an empty virtio queue with the given `max_size`. new(max_size: u16) -> Queue361 pub fn new(max_size: u16) -> Queue { 362 assert!(max_size.is_power_of_two()); 363 Queue { 364 activated: false, 365 max_size, 366 size: max_size, 367 ready: false, 368 vector: VIRTIO_MSI_NO_VECTOR, 369 desc_table: GuestAddress(0), 370 avail_ring: GuestAddress(0), 371 used_ring: GuestAddress(0), 372 next_avail: Wrapping(0), 373 next_used: Wrapping(0), 374 features: 0, 375 last_used: Wrapping(0), 376 iommu: None, 377 exported_desc_table: None, 378 exported_avail_ring: None, 379 exported_used_ring: None, 380 } 381 } 382 383 accessors!(vector, u16, set_vector); 384 accessors!(desc_table, GuestAddress, set_desc_table); 385 accessors!(avail_ring, GuestAddress, set_avail_ring); 386 accessors!(used_ring, GuestAddress, set_used_ring); 387 388 /// Return the maximum size of this queue. max_size(&self) -> u16389 pub fn max_size(&self) -> u16 { 390 self.max_size 391 } 392 393 /// Return the actual size of the queue, as the driver may not set up a 394 /// queue as big as the device allows. size(&self) -> u16395 pub fn size(&self) -> u16 { 396 self.size 397 } 398 399 /// Set the queue size requested by the driver, which may be smaller than the maximum size. set_size(&mut self, val: u16)400 pub fn set_size(&mut self, val: u16) { 401 if self.ready { 402 warn!("ignoring write to queue_size on ready queue"); 403 return; 404 } 405 406 if val > self.max_size || !val.is_power_of_two() { 407 warn!( 408 "ignoring invalid queue_size {} (max_size {})", 409 val, self.max_size, 410 ); 411 return; 412 } 413 414 self.size = val; 415 } 416 417 /// Return whether the driver has enabled this queue. ready(&self) -> bool418 pub fn ready(&self) -> bool { 419 self.ready 420 } 421 422 /// Signal that the driver has completed queue configuration. set_ready(&mut self, enable: bool)423 pub fn set_ready(&mut self, enable: bool) { 424 // If the queue is already in the desired state, return early. 425 if enable == self.ready { 426 return; 427 } 428 429 if enable { 430 // Validate addresses and queue size to ensure that address calculation won't overflow. 431 let ring_sizes = self.ring_sizes(); 432 let rings = 433 ring_sizes 434 .iter() 435 .zip(vec!["descriptor table", "available ring", "used ring"]); 436 437 for ((addr, size), name) in rings { 438 if addr.checked_add(*size as u64).is_none() { 439 error!( 440 "virtio queue {} goes out of bounds: start:0x{:08x} size:0x{:08x}", 441 name, 442 addr.offset(), 443 size, 444 ); 445 return; 446 } 447 } 448 } 449 450 self.ready = enable; 451 } 452 453 /// Convert the queue configuration into an active queue. activate(&mut self) -> Result<Queue>454 pub fn activate(&mut self) -> Result<Queue> { 455 if !self.ready { 456 bail!("attempted to activate a non-ready queue"); 457 } 458 459 if self.activated { 460 bail!("queue is already activated"); 461 } 462 463 self.activated = true; 464 465 let queue = Queue { 466 activated: self.activated, 467 max_size: self.max_size, 468 size: self.size, 469 ready: self.ready, 470 vector: self.vector, 471 desc_table: self.desc_table, 472 avail_ring: self.avail_ring, 473 used_ring: self.used_ring, 474 next_avail: self.next_avail, 475 next_used: self.next_used, 476 features: self.features, 477 last_used: self.last_used, 478 iommu: self.iommu.as_ref().map(Arc::clone), 479 exported_desc_table: self.exported_desc_table.clone(), 480 exported_avail_ring: self.exported_avail_ring.clone(), 481 exported_used_ring: self.exported_used_ring.clone(), 482 }; 483 Ok(queue) 484 } 485 486 // Return `index` modulo the currently configured queue size. wrap_queue_index(&self, index: Wrapping<u16>) -> u16487 fn wrap_queue_index(&self, index: Wrapping<u16>) -> u16 { 488 // We know that `self.size` is a power of two (enforced by `set_size()`), so the modulus can 489 // be calculated with a bitmask rather than actual division. 490 debug_assert!(self.size.is_power_of_two()); 491 index.0 & (self.size - 1) 492 } 493 494 /// Reset queue to a clean state reset(&mut self)495 pub fn reset(&mut self) { 496 self.activated = false; 497 self.ready = false; 498 self.size = self.max_size; 499 self.vector = VIRTIO_MSI_NO_VECTOR; 500 self.desc_table = GuestAddress(0); 501 self.avail_ring = GuestAddress(0); 502 self.used_ring = GuestAddress(0); 503 self.next_avail = Wrapping(0); 504 self.next_used = Wrapping(0); 505 self.features = 0; 506 self.last_used = Wrapping(0); 507 self.exported_desc_table = None; 508 self.exported_avail_ring = None; 509 self.exported_used_ring = None; 510 } 511 512 /// Reset queue's counters. 513 /// This method doesn't change the queue's metadata so it's reusable without initializing it 514 /// again. reset_counters(&mut self)515 pub fn reset_counters(&mut self) { 516 self.next_avail = Wrapping(0); 517 self.next_used = Wrapping(0); 518 self.last_used = Wrapping(0); 519 } 520 ring_sizes(&self) -> Vec<(GuestAddress, usize)>521 fn ring_sizes(&self) -> Vec<(GuestAddress, usize)> { 522 let queue_size = self.size as usize; 523 vec![ 524 (self.desc_table, 16 * queue_size), 525 (self.avail_ring, 6 + 2 * queue_size), 526 (self.used_ring, 6 + 8 * queue_size), 527 ] 528 } 529 530 /// If this queue is for a device that sits behind a virtio-iommu device, exports 531 /// this queue's memory. After the queue becomes ready, this must be called before 532 /// using the queue, to convert the IOVA-based configuration to GuestAddresses. export_memory(&mut self, mem: &GuestMemory) -> Result<()>533 pub fn export_memory(&mut self, mem: &GuestMemory) -> Result<()> { 534 if !self.ready { 535 bail!("not ready"); 536 } 537 if self.exported_desc_table.is_some() { 538 bail!("already exported"); 539 } 540 541 let iommu = self.iommu.as_ref().context("no iommu to export with")?; 542 543 let ring_sizes = self.ring_sizes(); 544 let rings = ring_sizes.iter().zip(vec![ 545 &mut self.exported_desc_table, 546 &mut self.exported_avail_ring, 547 &mut self.exported_used_ring, 548 ]); 549 550 for ((addr, size), region) in rings { 551 *region = Some( 552 ExportedRegion::new(mem, iommu.clone(), addr.offset(), *size as u64) 553 .context("failed to export region")?, 554 ); 555 } 556 Ok(()) 557 } 558 559 /// Releases memory exported by a previous call to [`Queue::export_memory()`]. release_exported_memory(&mut self)560 pub fn release_exported_memory(&mut self) { 561 self.exported_desc_table = None; 562 self.exported_avail_ring = None; 563 self.exported_used_ring = None; 564 } 565 566 // Get the index of the first available descriptor chain in the available ring 567 // (the next one that the driver will fill). 568 // 569 // All available ring entries between `self.next_avail` and `get_avail_index()` are available 570 // to be processed by the device. get_avail_index(&self, mem: &GuestMemory) -> Wrapping<u16>571 fn get_avail_index(&self, mem: &GuestMemory) -> Wrapping<u16> { 572 fence(Ordering::SeqCst); 573 574 let avail_index_addr = self.avail_ring.unchecked_add(2); 575 let avail_index: u16 = 576 read_obj_from_addr_wrapper(mem, &self.exported_avail_ring, avail_index_addr).unwrap(); 577 578 Wrapping(avail_index) 579 } 580 581 // Set the `avail_event` field in the used ring. 582 // 583 // This allows the device to inform the driver that driver-to-device notification 584 // (kicking the ring) is not necessary until the driver reaches the `avail_index` descriptor. 585 // 586 // This value is only used if the `VIRTIO_F_EVENT_IDX` feature has been negotiated. set_avail_event(&mut self, mem: &GuestMemory, avail_index: Wrapping<u16>)587 fn set_avail_event(&mut self, mem: &GuestMemory, avail_index: Wrapping<u16>) { 588 fence(Ordering::SeqCst); 589 590 let avail_event_addr = self.used_ring.unchecked_add(4 + 8 * u64::from(self.size)); 591 write_obj_at_addr_wrapper( 592 mem, 593 &self.exported_used_ring, 594 avail_index.0, 595 avail_event_addr, 596 ) 597 .unwrap(); 598 } 599 600 // Query the value of a single-bit flag in the available ring. 601 // 602 // Returns `true` if `flag` is currently set (by the driver) in the available ring flags. get_avail_flag(&self, mem: &GuestMemory, flag: u16) -> bool603 fn get_avail_flag(&self, mem: &GuestMemory, flag: u16) -> bool { 604 fence(Ordering::SeqCst); 605 606 let avail_flags: u16 = 607 read_obj_from_addr_wrapper(mem, &self.exported_avail_ring, self.avail_ring).unwrap(); 608 609 avail_flags & flag == flag 610 } 611 612 // Get the `used_event` field in the available ring. 613 // 614 // The returned value is the index of the next descriptor chain entry for which the driver 615 // needs to be notified upon use. Entries before this index may be used without notifying 616 // the driver. 617 // 618 // This value is only valid if the `VIRTIO_F_EVENT_IDX` feature has been negotiated. get_used_event(&self, mem: &GuestMemory) -> Wrapping<u16>619 fn get_used_event(&self, mem: &GuestMemory) -> Wrapping<u16> { 620 fence(Ordering::SeqCst); 621 622 let used_event_addr = self.avail_ring.unchecked_add(4 + 2 * u64::from(self.size)); 623 let used_event: u16 = 624 read_obj_from_addr_wrapper(mem, &self.exported_avail_ring, used_event_addr).unwrap(); 625 626 Wrapping(used_event) 627 } 628 629 // Set the `idx` field in the used ring. 630 // 631 // This indicates to the driver that all entries up to (but not including) `used_index` have 632 // been used by the device and may be processed by the driver. set_used_index(&mut self, mem: &GuestMemory, used_index: Wrapping<u16>)633 fn set_used_index(&mut self, mem: &GuestMemory, used_index: Wrapping<u16>) { 634 fence(Ordering::SeqCst); 635 636 let used_index_addr = self.used_ring.unchecked_add(2); 637 write_obj_at_addr_wrapper(mem, &self.exported_used_ring, used_index.0, used_index_addr) 638 .unwrap(); 639 } 640 641 /// Get the first available descriptor chain without removing it from the queue. 642 /// Call `pop_peeked` to remove the returned descriptor chain from the queue. peek(&mut self, mem: &GuestMemory) -> Option<DescriptorChain>643 pub fn peek(&mut self, mem: &GuestMemory) -> Option<DescriptorChain> { 644 if !self.ready { 645 error!("attempt to use virtio queue that is not marked ready"); 646 return None; 647 } 648 649 let avail_index = self.get_avail_index(mem); 650 if self.next_avail == avail_index { 651 return None; 652 } 653 654 // This fence ensures that subsequent reads from the descriptor do not 655 // get reordered and happen only after fetching the available_index and 656 // checking that there is a slot available. 657 fence(Ordering::SeqCst); 658 659 let desc_idx_addr_offset = 4 + (u64::from(self.wrap_queue_index(self.next_avail)) * 2); 660 let desc_idx_addr = self.avail_ring.checked_add(desc_idx_addr_offset)?; 661 662 // This index is checked below in checked_new. 663 let descriptor_index: u16 = 664 read_obj_from_addr_wrapper(mem, &self.exported_avail_ring, desc_idx_addr).unwrap(); 665 666 let iommu = self.iommu.as_ref().map(Arc::clone); 667 DescriptorChain::checked_new( 668 mem, 669 self.desc_table, 670 self.size, 671 descriptor_index, 672 0, 673 iommu, 674 self.exported_desc_table.clone(), 675 ) 676 .map_err(|e| { 677 error!("{:#}", e); 678 e 679 }) 680 .ok() 681 } 682 683 /// Remove the first available descriptor chain from the queue. 684 /// This function should only be called immediately following `peek`. pop_peeked(&mut self, mem: &GuestMemory)685 pub fn pop_peeked(&mut self, mem: &GuestMemory) { 686 self.next_avail += Wrapping(1); 687 if self.features & ((1u64) << VIRTIO_RING_F_EVENT_IDX) != 0 { 688 self.set_avail_event(mem, self.next_avail); 689 } 690 } 691 692 /// If a new DescriptorHead is available, returns one and removes it from the queue. pop(&mut self, mem: &GuestMemory) -> Option<DescriptorChain>693 pub fn pop(&mut self, mem: &GuestMemory) -> Option<DescriptorChain> { 694 let descriptor_chain = self.peek(mem); 695 if descriptor_chain.is_some() { 696 self.pop_peeked(mem); 697 } 698 descriptor_chain 699 } 700 701 /// A consuming iterator over all available descriptor chain heads offered by the driver. iter<'a, 'b>(&'b mut self, mem: &'a GuestMemory) -> AvailIter<'a, 'b>702 pub fn iter<'a, 'b>(&'b mut self, mem: &'a GuestMemory) -> AvailIter<'a, 'b> { 703 AvailIter { mem, queue: self } 704 } 705 706 /// Asynchronously read the next descriptor chain from the queue. 707 /// Returns a `DescriptorChain` when it is `await`ed. next_async( &mut self, mem: &GuestMemory, eventfd: &mut EventAsync, ) -> std::result::Result<DescriptorChain, AsyncError>708 pub async fn next_async( 709 &mut self, 710 mem: &GuestMemory, 711 eventfd: &mut EventAsync, 712 ) -> std::result::Result<DescriptorChain, AsyncError> { 713 loop { 714 // Check if there are more descriptors available. 715 if let Some(chain) = self.pop(mem) { 716 return Ok(chain); 717 } 718 eventfd.next_val().await?; 719 } 720 } 721 722 /// Puts an available descriptor head into the used ring for use by the guest. add_used(&mut self, mem: &GuestMemory, desc_index: u16, len: u32)723 pub fn add_used(&mut self, mem: &GuestMemory, desc_index: u16, len: u32) { 724 if desc_index >= self.size { 725 error!( 726 "attempted to add out of bounds descriptor to used ring: {}", 727 desc_index 728 ); 729 return; 730 } 731 732 let used_ring = self.used_ring; 733 let next_used = self.wrap_queue_index(self.next_used) as usize; 734 let used_elem = used_ring.unchecked_add((4 + next_used * 8) as u64); 735 736 // These writes can't fail as we are guaranteed to be within the descriptor ring. 737 write_obj_at_addr_wrapper(mem, &self.exported_used_ring, desc_index as u32, used_elem) 738 .unwrap(); 739 write_obj_at_addr_wrapper( 740 mem, 741 &self.exported_used_ring, 742 len as u32, 743 used_elem.unchecked_add(4), 744 ) 745 .unwrap(); 746 747 self.next_used += Wrapping(1); 748 self.set_used_index(mem, self.next_used); 749 } 750 751 /// Returns if the queue should have an interrupt sent based on its state. 752 /// 753 /// This function implements `VIRTIO_RING_F_EVENT_IDX`, otherwise known as 754 /// interrupt suppression. The virtio spec provides the driver with a field, 755 /// `used_event`, which says that once we write that descriptor (or several 756 /// in the case of a flurry of `add_used` calls), we should send a 757 /// notification. Because the values involved wrap around `u16::MAX`, and to 758 /// avoid checking the condition on every `add_used` call, the math is a 759 /// little complicated. 760 /// 761 /// The critical inequality is: 762 /// ```text 763 /// (next_used - 1) - used_event < next_used - last_used 764 /// ``` 765 /// 766 /// For illustration purposes, we label it as `A < B`, where 767 /// `A = (next_used -1) - used_event`, and `B = next_used - last_used`. 768 /// 769 /// `A` and `B` represent two distances, measured in a wrapping ring of size 770 /// `u16::MAX`. In the "send intr" case, the inequality is true. In the 771 /// "don't send intr" case, the inequality is false. We must be very careful 772 /// in assigning a direction to the ring, so that when we 773 /// graph the subtraction operations, we are measuring the right distance 774 /// (similar to how DC circuits are analyzed). 775 /// 776 /// The two distances are as follows: 777 /// * `A` is the distance between the driver's requested notification 778 /// point, and the current position in the ring. 779 /// 780 /// * `B` is the distance between the last time we notified the guest, 781 /// and the current position in the ring. 782 /// 783 /// If we graph these distances for the situation where we want to notify 784 /// the guest, and when we don't want to notify the guest, we see that 785 /// `A < B` becomes true the moment `next_used - 1` passes `used_event`. See 786 /// the graphs at the bottom of this comment block for a more visual 787 /// explanation. 788 /// 789 /// Once an interrupt is sent, we have a final useful property: last_used 790 /// moves up next_used, which causes the inequality to be false. Thus, we 791 /// won't send notifications again until `used_event` is moved forward by 792 /// the driver. 793 /// 794 /// Finally, let's talk about a couple of ways to write this inequality 795 /// that don't work, and critically, explain *why*. 796 /// 797 /// First, a naive reading of the virtio spec might lead us to ask: why not 798 /// just use the following inequality: 799 /// ```text 800 /// next_used - 1 >= used_event 801 /// ``` 802 /// 803 /// because that's much simpler, right? The trouble is that the ring wraps, 804 /// so it could be that a smaller index is actually ahead of a larger one. 805 /// That's why we have to use distances in the ring instead. 806 /// 807 /// Second, one might look at the correct inequality: 808 /// ```text 809 /// (next_used - 1) - used_event < next_used - last_used 810 /// ``` 811 /// 812 /// And try to simplify it to: 813 /// ```text 814 /// last_used - 1 < used_event 815 /// ``` 816 /// 817 /// Functionally, this won't work because next_used isn't present at all 818 /// anymore. (Notifications will never be sent.) But why is that? The algebra 819 /// here *appears* to work out, but all semantic meaning is lost. There are 820 /// two explanations for why this happens: 821 /// * The intuitive one: the terms in the inequality are not actually 822 /// separable; in other words, (next_used - last_used) is an inseparable 823 /// term, so subtracting next_used from both sides of the original 824 /// inequality and zeroing them out is semantically invalid. But why aren't 825 /// they separable? See below. 826 /// * The theoretical one: canceling like terms relies a vector space law: 827 /// a + x = b + x => a = b (cancellation law). For congruences / equality 828 /// under modulo, this law is satisfied, but for inequalities under mod, it 829 /// is not; therefore, we cannot cancel like terms. 830 /// 831 /// ```text 832 /// ┌──────────────────────────────────┐ 833 /// │ │ 834 /// │ │ 835 /// │ │ 836 /// │ ┌──────────── next_used - 1 837 /// │ │A x 838 /// │ │ ┌────────────x────────────┐ 839 /// │ │ │ x │ 840 /// │ │ │ │ 841 /// │ │ │ │ │ 842 /// │ │ │ │ │ 843 /// │ used_event xxxx + ◄───┘ xxxxx last_used 844 /// │ │ │ │ 845 /// │ │ Send intr │ │ 846 /// │ │ │ │ 847 /// │ └─────────────────────────┘ │ 848 /// │ │ 849 /// │ B │ 850 /// └────────────────────────────────────────────────────┘ 851 /// 852 /// ┌───────────────────────────────────────────────────┐ 853 /// │ A │ 854 /// │ ┌────────────────────────┐ │ 855 /// │ │ │ │ 856 /// │ │ │ │ 857 /// │ │ │ │ │ 858 /// │ │ │ │ │ 859 /// used_event xxxx │ xxxxx last_used │ 860 /// │ + ◄───┘ │ │ │ 861 /// │ │ │ │ 862 /// │ Don't send intr │ │ │ 863 /// │ │ │ │ 864 /// └───────────x────────────┘ │ │ 865 /// x │ │ 866 /// next_used - 1 │ │ 867 /// │ │ B │ │ 868 /// │ └────────────────────┘ │ 869 /// │ │ 870 /// └──────────────────────────────────┘ 871 /// ``` queue_wants_interrupt(&self, mem: &GuestMemory) -> bool872 fn queue_wants_interrupt(&self, mem: &GuestMemory) -> bool { 873 if self.features & ((1u64) << VIRTIO_RING_F_EVENT_IDX) != 0 { 874 let used_event = self.get_used_event(mem); 875 self.next_used - used_event - Wrapping(1) < self.next_used - self.last_used 876 } else { 877 !self.get_avail_flag(mem, VIRTQ_AVAIL_F_NO_INTERRUPT) 878 } 879 } 880 881 /// inject interrupt into guest on this queue 882 /// return true: interrupt is injected into guest for this queue 883 /// false: interrupt isn't injected trigger_interrupt<I: SignalableInterrupt>( &mut self, mem: &GuestMemory, interrupt: &I, ) -> bool884 pub fn trigger_interrupt<I: SignalableInterrupt>( 885 &mut self, 886 mem: &GuestMemory, 887 interrupt: &I, 888 ) -> bool { 889 if self.queue_wants_interrupt(mem) { 890 self.last_used = self.next_used; 891 interrupt.signal_used_queue(self.vector); 892 true 893 } else { 894 false 895 } 896 } 897 898 /// Acknowledges that this set of features should be enabled on this queue. ack_features(&mut self, features: u64)899 pub fn ack_features(&mut self, features: u64) { 900 self.features |= features; 901 } 902 set_iommu(&mut self, iommu: Arc<Mutex<IpcMemoryMapper>>)903 pub fn set_iommu(&mut self, iommu: Arc<Mutex<IpcMemoryMapper>>) { 904 self.iommu = Some(iommu); 905 } 906 } 907 908 #[cfg(test)] 909 mod tests { 910 use std::convert::TryInto; 911 912 use memoffset::offset_of; 913 914 use super::super::Interrupt; 915 use super::*; 916 use crate::IrqLevelEvent; 917 918 const GUEST_MEMORY_SIZE: u64 = 0x10000; 919 const DESC_OFFSET: u64 = 0; 920 const AVAIL_OFFSET: u64 = 0x200; 921 const USED_OFFSET: u64 = 0x400; 922 const QUEUE_SIZE: usize = 0x10; 923 const BUFFER_OFFSET: u64 = 0x8000; 924 const BUFFER_LEN: u32 = 0x400; 925 926 #[derive(Copy, Clone, Debug, FromBytes, AsBytes)] 927 #[repr(C)] 928 struct Avail { 929 flags: Le16, 930 idx: Le16, 931 ring: [Le16; QUEUE_SIZE], 932 used_event: Le16, 933 } 934 935 impl Default for Avail { default() -> Self936 fn default() -> Self { 937 Avail { 938 flags: Le16::from(0u16), 939 idx: Le16::from(0u16), 940 ring: [Le16::from(0u16); QUEUE_SIZE], 941 used_event: Le16::from(0u16), 942 } 943 } 944 } 945 946 #[derive(Copy, Clone, Debug, FromBytes, AsBytes)] 947 #[repr(C)] 948 struct UsedElem { 949 id: Le32, 950 len: Le32, 951 } 952 953 impl Default for UsedElem { default() -> Self954 fn default() -> Self { 955 UsedElem { 956 id: Le32::from(0u32), 957 len: Le32::from(0u32), 958 } 959 } 960 } 961 962 #[derive(Copy, Clone, Debug, FromBytes, AsBytes)] 963 #[repr(C, packed)] 964 struct Used { 965 flags: Le16, 966 idx: Le16, 967 used_elem_ring: [UsedElem; QUEUE_SIZE], 968 avail_event: Le16, 969 } 970 971 impl Default for Used { default() -> Self972 fn default() -> Self { 973 Used { 974 flags: Le16::from(0u16), 975 idx: Le16::from(0u16), 976 used_elem_ring: [UsedElem::default(); QUEUE_SIZE], 977 avail_event: Le16::from(0u16), 978 } 979 } 980 } 981 setup_vq(queue: &mut Queue, mem: &GuestMemory)982 fn setup_vq(queue: &mut Queue, mem: &GuestMemory) { 983 let desc = Desc { 984 addr: Le64::from(BUFFER_OFFSET), 985 len: Le32::from(BUFFER_LEN), 986 flags: Le16::from(0u16), 987 next: Le16::from(1u16), 988 }; 989 let _ = mem.write_obj_at_addr(desc, GuestAddress(DESC_OFFSET)); 990 991 let avail = Avail::default(); 992 let _ = mem.write_obj_at_addr(avail, GuestAddress(AVAIL_OFFSET)); 993 994 let used = Used::default(); 995 let _ = mem.write_obj_at_addr(used, GuestAddress(USED_OFFSET)); 996 997 queue.desc_table = GuestAddress(DESC_OFFSET); 998 queue.avail_ring = GuestAddress(AVAIL_OFFSET); 999 queue.used_ring = GuestAddress(USED_OFFSET); 1000 queue.ack_features((1u64) << VIRTIO_RING_F_EVENT_IDX); 1001 } 1002 1003 #[test] queue_event_id_guest_fast()1004 fn queue_event_id_guest_fast() { 1005 let mut queue = Queue::new(QUEUE_SIZE.try_into().unwrap()); 1006 let memory_start_addr = GuestAddress(0x0); 1007 let mem = GuestMemory::new(&[(memory_start_addr, GUEST_MEMORY_SIZE)]).unwrap(); 1008 setup_vq(&mut queue, &mem); 1009 1010 let interrupt = Interrupt::new(IrqLevelEvent::new().unwrap(), None, 10); 1011 1012 // Offset of used_event within Avail structure 1013 let used_event_offset = offset_of!(Avail, used_event) as u64; 1014 let used_event_address = GuestAddress(AVAIL_OFFSET + used_event_offset); 1015 1016 // Assume driver submit 0x100 req to device, 1017 // device has handled them, so increase self.next_used to 0x100 1018 let mut device_generate: Wrapping<u16> = Wrapping(0x100); 1019 for _ in 0..device_generate.0 { 1020 queue.add_used(&mem, 0x0, BUFFER_LEN); 1021 } 1022 1023 // At this moment driver hasn't handled any interrupts yet, so it 1024 // should inject interrupt. 1025 assert_eq!(queue.trigger_interrupt(&mem, &interrupt), true); 1026 1027 // Driver handle all the interrupts and update avail.used_event to 0x100 1028 let mut driver_handled = device_generate; 1029 let _ = mem.write_obj_at_addr(Le16::from(driver_handled.0), used_event_address); 1030 1031 // At this moment driver have handled all the interrupts, and 1032 // device doesn't generate more data, so interrupt isn't needed. 1033 assert_eq!(queue.trigger_interrupt(&mem, &interrupt), false); 1034 1035 // Assume driver submit another u16::MAX - 0x100 req to device, 1036 // Device has handled all of them, so increase self.next_used to u16::MAX 1037 for _ in device_generate.0..u16::max_value() { 1038 queue.add_used(&mem, 0x0, BUFFER_LEN); 1039 } 1040 device_generate = Wrapping(u16::max_value()); 1041 1042 // At this moment driver just handled 0x100 interrupts, so it 1043 // should inject interrupt. 1044 assert_eq!(queue.trigger_interrupt(&mem, &interrupt), true); 1045 1046 // driver handle all the interrupts and update avail.used_event to u16::MAX 1047 driver_handled = device_generate; 1048 let _ = mem.write_obj_at_addr(Le16::from(driver_handled.0), used_event_address); 1049 1050 // At this moment driver have handled all the interrupts, and 1051 // device doesn't generate more data, so interrupt isn't needed. 1052 assert_eq!(queue.trigger_interrupt(&mem, &interrupt), false); 1053 1054 // Assume driver submit another 1 request, 1055 // device has handled it, so wrap self.next_used to 0 1056 queue.add_used(&mem, 0x0, BUFFER_LEN); 1057 device_generate += Wrapping(1); 1058 1059 // At this moment driver has handled all the previous interrupts, so it 1060 // should inject interrupt again. 1061 assert_eq!(queue.trigger_interrupt(&mem, &interrupt), true); 1062 1063 // driver handle that interrupts and update avail.used_event to 0 1064 driver_handled = device_generate; 1065 let _ = mem.write_obj_at_addr(Le16::from(driver_handled.0), used_event_address); 1066 1067 // At this moment driver have handled all the interrupts, and 1068 // device doesn't generate more data, so interrupt isn't needed. 1069 assert_eq!(queue.trigger_interrupt(&mem, &interrupt), false); 1070 } 1071 1072 #[test] queue_event_id_guest_slow()1073 fn queue_event_id_guest_slow() { 1074 let mut queue = Queue::new(QUEUE_SIZE.try_into().unwrap()); 1075 let memory_start_addr = GuestAddress(0x0); 1076 let mem = GuestMemory::new(&[(memory_start_addr, GUEST_MEMORY_SIZE)]).unwrap(); 1077 setup_vq(&mut queue, &mem); 1078 1079 let interrupt = Interrupt::new(IrqLevelEvent::new().unwrap(), None, 10); 1080 1081 // Offset of used_event within Avail structure 1082 let used_event_offset = offset_of!(Avail, used_event) as u64; 1083 let used_event_address = GuestAddress(AVAIL_OFFSET + used_event_offset); 1084 1085 // Assume driver submit 0x100 req to device, 1086 // device have handled 0x100 req, so increase self.next_used to 0x100 1087 let mut device_generate: Wrapping<u16> = Wrapping(0x100); 1088 for _ in 0..device_generate.0 { 1089 queue.add_used(&mem, 0x0, BUFFER_LEN); 1090 } 1091 1092 // At this moment driver hasn't handled any interrupts yet, so it 1093 // should inject interrupt. 1094 assert_eq!(queue.trigger_interrupt(&mem, &interrupt), true); 1095 1096 // Driver handle part of the interrupts and update avail.used_event to 0x80 1097 let mut driver_handled = Wrapping(0x80); 1098 let _ = mem.write_obj_at_addr(Le16::from(driver_handled.0), used_event_address); 1099 1100 // At this moment driver hasn't finished last interrupt yet, 1101 // so interrupt isn't needed. 1102 assert_eq!(queue.trigger_interrupt(&mem, &interrupt), false); 1103 1104 // Assume driver submit another 1 request, 1105 // device has handled it, so increment self.next_used. 1106 queue.add_used(&mem, 0x0, BUFFER_LEN); 1107 device_generate += Wrapping(1); 1108 1109 // At this moment driver hasn't finished last interrupt yet, 1110 // so interrupt isn't needed. 1111 assert_eq!(queue.trigger_interrupt(&mem, &interrupt), false); 1112 1113 // Assume driver submit another u16::MAX - 0x101 req to device, 1114 // Device has handled all of them, so increase self.next_used to u16::MAX 1115 for _ in device_generate.0..u16::max_value() { 1116 queue.add_used(&mem, 0x0, BUFFER_LEN); 1117 } 1118 device_generate = Wrapping(u16::max_value()); 1119 1120 // At this moment driver hasn't finished last interrupt yet, 1121 // so interrupt isn't needed. 1122 assert_eq!(queue.trigger_interrupt(&mem, &interrupt), false); 1123 1124 // driver handle most of the interrupts and update avail.used_event to u16::MAX - 1, 1125 driver_handled = device_generate - Wrapping(1); 1126 let _ = mem.write_obj_at_addr(Le16::from(driver_handled.0), used_event_address); 1127 1128 // Assume driver submit another 1 request, 1129 // device has handled it, so wrap self.next_used to 0 1130 queue.add_used(&mem, 0x0, BUFFER_LEN); 1131 device_generate += Wrapping(1); 1132 1133 // At this moment driver has already finished the last interrupt(0x100), 1134 // and device service other request, so new interrupt is needed. 1135 assert_eq!(queue.trigger_interrupt(&mem, &interrupt), true); 1136 1137 // Assume driver submit another 1 request, 1138 // device has handled it, so increment self.next_used to 1 1139 queue.add_used(&mem, 0x0, BUFFER_LEN); 1140 device_generate += Wrapping(1); 1141 1142 // At this moment driver hasn't finished last interrupt((Wrapping(0)) yet, 1143 // so interrupt isn't needed. 1144 assert_eq!(queue.trigger_interrupt(&mem, &interrupt), false); 1145 1146 // driver handle all the remain interrupts and wrap avail.used_event to 0x1. 1147 driver_handled = device_generate; 1148 let _ = mem.write_obj_at_addr(Le16::from(driver_handled.0), used_event_address); 1149 1150 // At this moment driver has handled all the interrupts, and 1151 // device doesn't generate more data, so interrupt isn't needed. 1152 assert_eq!(queue.trigger_interrupt(&mem, &interrupt), false); 1153 1154 // Assume driver submit another 1 request, 1155 // device has handled it, so increase self.next_used. 1156 queue.add_used(&mem, 0x0, BUFFER_LEN); 1157 device_generate += Wrapping(1); 1158 1159 // At this moment driver has finished all the previous interrupts, so it 1160 // should inject interrupt again. 1161 assert_eq!(queue.trigger_interrupt(&mem, &interrupt), true); 1162 } 1163 } 1164