1 // Copyright 2019 The ChromiumOS Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 use std::convert::TryInto; 6 7 use anyhow::Context; 8 use base::error; 9 use base::info; 10 use base::AsRawDescriptor; 11 use base::Error as SysError; 12 use base::Event; 13 use base::RawDescriptor; 14 use base::Tube; 15 use base::TubeError; 16 use bit_field::*; 17 use remain::sorted; 18 use serde::Deserialize; 19 use serde::Serialize; 20 use snapshot::AnySnapshot; 21 use thiserror::Error; 22 use vm_control::VmIrqRequest; 23 use vm_control::VmIrqResponse; 24 use zerocopy::FromBytes; 25 use zerocopy::Immutable; 26 use zerocopy::IntoBytes; 27 use zerocopy::KnownLayout; 28 29 use crate::pci::pci_configuration::PciCapConfig; 30 use crate::pci::pci_configuration::PciCapConfigWriteResult; 31 use crate::pci::PciCapability; 32 use crate::pci::PciCapabilityID; 33 34 const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048; 35 pub const MSIX_TABLE_ENTRIES_MODULO: u64 = 16; 36 pub const MSIX_PBA_ENTRIES_MODULO: u64 = 8; 37 pub const BITS_PER_PBA_ENTRY: usize = 64; 38 const FUNCTION_MASK_BIT: u16 = 0x4000; 39 const MSIX_ENABLE_BIT: u16 = 0x8000; 40 const MSIX_TABLE_ENTRY_MASK_BIT: u32 = 0x1; 41 42 #[derive(Serialize, Deserialize, Clone, Default)] 43 struct MsixTableEntry { 44 msg_addr_lo: u32, 45 msg_addr_hi: u32, 46 msg_data: u32, 47 vector_ctl: u32, 48 } 49 50 impl MsixTableEntry { masked(&self) -> bool51 fn masked(&self) -> bool { 52 self.vector_ctl & MSIX_TABLE_ENTRY_MASK_BIT == MSIX_TABLE_ENTRY_MASK_BIT 53 } 54 } 55 56 struct IrqfdGsi { 57 irqfd: Event, 58 gsi: u32, 59 } 60 61 /// Wrapper over MSI-X Capability Structure and MSI-X Tables 62 pub struct MsixConfig { 63 table_entries: Vec<MsixTableEntry>, 64 pba_entries: Vec<u64>, 65 irq_vec: Vec<Option<IrqfdGsi>>, 66 masked: bool, 67 enabled: bool, 68 msi_device_socket: Tube, 69 msix_num: u16, 70 pci_id: u32, 71 device_name: String, 72 } 73 74 #[derive(Serialize, Deserialize)] 75 struct MsixConfigSnapshot { 76 table_entries: Vec<MsixTableEntry>, 77 pba_entries: Vec<u64>, 78 /// Just like MsixConfig::irq_vec, but only the GSI. 79 irq_gsi_vec: Vec<Option<u32>>, 80 masked: bool, 81 enabled: bool, 82 msix_num: u16, 83 pci_id: u32, 84 device_name: String, 85 } 86 87 #[sorted] 88 #[derive(Error, Debug)] 89 pub enum MsixError { 90 #[error("AddMsiRoute failed: {0}")] 91 AddMsiRoute(SysError), 92 #[error("failed to receive AddMsiRoute response: {0}")] 93 AddMsiRouteRecv(TubeError), 94 #[error("failed to send AddMsiRoute request: {0}")] 95 AddMsiRouteSend(TubeError), 96 #[error("AllocateOneMsi failed: {0}")] 97 AllocateOneMsi(SysError), 98 #[error("failed to receive AllocateOneMsi response: {0}")] 99 AllocateOneMsiRecv(TubeError), 100 #[error("failed to send AllocateOneMsi request: {0}")] 101 AllocateOneMsiSend(TubeError), 102 #[error("failed to deserialize snapshot: {0}")] 103 DeserializationFailed(anyhow::Error), 104 #[error("invalid vector length in snapshot: {0}")] 105 InvalidVectorLength(std::num::TryFromIntError), 106 #[error("ReleaseOneIrq failed: {0}")] 107 ReleaseOneIrq(base::Error), 108 #[error("failed to receive ReleaseOneIrq response: {0}")] 109 ReleaseOneIrqRecv(TubeError), 110 #[error("failed to send ReleaseOneIrq request: {0}")] 111 ReleaseOneIrqSend(TubeError), 112 } 113 114 type MsixResult<T> = std::result::Result<T, MsixError>; 115 116 #[derive(Copy, Clone)] 117 pub enum MsixStatus { 118 Changed, 119 EntryChanged(usize), 120 NothingToDo, 121 } 122 123 impl PciCapConfigWriteResult for MsixStatus {} 124 125 impl MsixConfig { new(msix_vectors: u16, vm_socket: Tube, pci_id: u32, device_name: String) -> Self126 pub fn new(msix_vectors: u16, vm_socket: Tube, pci_id: u32, device_name: String) -> Self { 127 assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE); 128 129 let mut table_entries: Vec<MsixTableEntry> = Vec::new(); 130 table_entries.resize_with(msix_vectors as usize, Default::default); 131 table_entries 132 .iter_mut() 133 .for_each(|entry| entry.vector_ctl |= MSIX_TABLE_ENTRY_MASK_BIT); 134 let mut pba_entries: Vec<u64> = Vec::new(); 135 let num_pba_entries: usize = (msix_vectors as usize).div_ceil(BITS_PER_PBA_ENTRY); 136 pba_entries.resize_with(num_pba_entries, Default::default); 137 138 let mut irq_vec = Vec::new(); 139 irq_vec.resize_with(msix_vectors.into(), || None::<IrqfdGsi>); 140 141 MsixConfig { 142 table_entries, 143 pba_entries, 144 irq_vec, 145 masked: false, 146 enabled: false, 147 msi_device_socket: vm_socket, 148 msix_num: msix_vectors, 149 pci_id, 150 device_name, 151 } 152 } 153 154 /// Get the number of MSI-X vectors in this configuration. num_vectors(&self) -> u16155 pub fn num_vectors(&self) -> u16 { 156 self.msix_num 157 } 158 159 /// Check whether the Function Mask bit in Message Control word in set or not. 160 /// if 1, all of the vectors associated with the function are masked, 161 /// regardless of their per-vector Mask bit states. 162 /// If 0, each vector's Mask bit determines whether the vector is masked or not. masked(&self) -> bool163 pub fn masked(&self) -> bool { 164 self.masked 165 } 166 167 /// Check whether the Function Mask bit in MSIX table Message Control 168 /// word in set or not. 169 /// If true, the vector is masked. 170 /// If false, the vector is unmasked. table_masked(&self, index: usize) -> bool171 pub fn table_masked(&self, index: usize) -> bool { 172 if index >= self.table_entries.len() { 173 true 174 } else { 175 self.table_entries[index].masked() 176 } 177 } 178 179 /// Check whether the MSI-X Enable bit in Message Control word in set or not. 180 /// if 1, the function is permitted to use MSI-X to request service. enabled(&self) -> bool181 pub fn enabled(&self) -> bool { 182 self.enabled 183 } 184 185 /// Read the MSI-X Capability Structure. 186 /// The top 2 bits in Message Control word are emulated and all other 187 /// bits are read only. read_msix_capability(&self, data: u32) -> u32188 pub fn read_msix_capability(&self, data: u32) -> u32 { 189 let mut msg_ctl = (data >> 16) as u16; 190 msg_ctl &= !(MSIX_ENABLE_BIT | FUNCTION_MASK_BIT); 191 192 if self.enabled { 193 msg_ctl |= MSIX_ENABLE_BIT; 194 } 195 if self.masked { 196 msg_ctl |= FUNCTION_MASK_BIT; 197 } 198 (msg_ctl as u32) << 16 | (data & u16::MAX as u32) 199 } 200 201 /// Write to the MSI-X Capability Structure. 202 /// Only the top 2 bits in Message Control Word are writable. write_msix_capability(&mut self, offset: u64, data: &[u8]) -> MsixStatus203 pub fn write_msix_capability(&mut self, offset: u64, data: &[u8]) -> MsixStatus { 204 if offset == 2 && data.len() == 2 { 205 let reg = u16::from_le_bytes([data[0], data[1]]); 206 let old_masked = self.masked; 207 let old_enabled = self.enabled; 208 209 self.masked = (reg & FUNCTION_MASK_BIT) == FUNCTION_MASK_BIT; 210 self.enabled = (reg & MSIX_ENABLE_BIT) == MSIX_ENABLE_BIT; 211 212 if !old_enabled && self.enabled { 213 if let Err(e) = self.msix_enable_all() { 214 error!("failed to enable MSI-X: {}", e); 215 self.enabled = false; 216 } 217 } 218 219 // If the Function Mask bit was set, and has just been cleared, it's 220 // important to go through the entire PBA to check if there was any 221 // pending MSI-X message to inject, given that the vector is not 222 // masked. 223 if old_masked && !self.masked { 224 for (index, entry) in self.table_entries.clone().iter().enumerate() { 225 if !entry.masked() && self.get_pba_bit(index as u16) == 1 { 226 self.inject_msix_and_clear_pba(index); 227 } 228 } 229 return MsixStatus::Changed; 230 } else if !old_masked && self.masked { 231 return MsixStatus::Changed; 232 } 233 } else { 234 error!( 235 "invalid write to MSI-X Capability Structure offset {:x}", 236 offset 237 ); 238 } 239 MsixStatus::NothingToDo 240 } 241 242 /// Create a snapshot of the current MsixConfig struct for use in 243 /// snapshotting. snapshot(&mut self) -> anyhow::Result<AnySnapshot>244 pub fn snapshot(&mut self) -> anyhow::Result<AnySnapshot> { 245 AnySnapshot::to_any(MsixConfigSnapshot { 246 table_entries: self.table_entries.clone(), 247 pba_entries: self.pba_entries.clone(), 248 masked: self.masked, 249 enabled: self.enabled, 250 msix_num: self.msix_num, 251 pci_id: self.pci_id, 252 device_name: self.device_name.clone(), 253 irq_gsi_vec: self 254 .irq_vec 255 .iter() 256 .map(|irq_opt| irq_opt.as_ref().map(|irq| irq.gsi)) 257 .collect(), 258 }) 259 .context("failed to serialize MsixConfigSnapshot") 260 } 261 262 /// Restore a MsixConfig struct based on a snapshot. In short, this will 263 /// restore all data exposed via MMIO, and recreate all MSI-X vectors (they 264 /// will be re-wired to the irq chip). restore(&mut self, snapshot: AnySnapshot) -> MsixResult<()>265 pub fn restore(&mut self, snapshot: AnySnapshot) -> MsixResult<()> { 266 let snapshot: MsixConfigSnapshot = 267 AnySnapshot::from_any(snapshot).map_err(MsixError::DeserializationFailed)?; 268 269 self.table_entries = snapshot.table_entries; 270 self.pba_entries = snapshot.pba_entries; 271 self.masked = snapshot.masked; 272 self.enabled = snapshot.enabled; 273 self.msix_num = snapshot.msix_num; 274 self.pci_id = snapshot.pci_id; 275 self.device_name = snapshot.device_name; 276 277 self.msix_release_all()?; 278 self.irq_vec 279 .resize_with(snapshot.irq_gsi_vec.len(), || None::<IrqfdGsi>); 280 for (vector, gsi) in snapshot.irq_gsi_vec.iter().enumerate() { 281 if let Some(gsi_num) = gsi { 282 self.msix_restore_one(vector, *gsi_num)?; 283 } else { 284 info!( 285 "skipping restore of vector {} for device {}", 286 vector, self.device_name 287 ); 288 } 289 } 290 Ok(()) 291 } 292 293 /// Restore the specified MSI-X vector. 294 /// 295 /// Note: we skip the checks from [MsixConfig::msix_enable_one] because for 296 /// an interrupt to be present in [MsixConfigSnapshot::irq_gsi_vec], it must 297 /// have passed those checks. msix_restore_one(&mut self, index: usize, gsi: u32) -> MsixResult<()>298 fn msix_restore_one(&mut self, index: usize, gsi: u32) -> MsixResult<()> { 299 let irqfd = Event::new().map_err(MsixError::AllocateOneMsi)?; 300 let request = VmIrqRequest::AllocateOneMsiAtGsi { 301 irqfd, 302 gsi, 303 device_id: self.pci_id, 304 queue_id: index, 305 device_name: self.device_name.clone(), 306 }; 307 self.msi_device_socket 308 .send(&request) 309 .map_err(MsixError::AllocateOneMsiSend)?; 310 if let VmIrqResponse::Err(e) = self 311 .msi_device_socket 312 .recv() 313 .map_err(MsixError::AllocateOneMsiRecv)? 314 { 315 return Err(MsixError::AllocateOneMsi(e)); 316 }; 317 318 self.irq_vec[index] = Some(IrqfdGsi { 319 irqfd: match request { 320 VmIrqRequest::AllocateOneMsiAtGsi { irqfd, .. } => irqfd, 321 _ => unreachable!(), 322 }, 323 gsi, 324 }); 325 self.add_msi_route(index as u16, gsi)?; 326 Ok(()) 327 } 328 329 /// On warm restore, there could already be MSIs registered. We need to 330 /// release them in case the routing has changed (e.g. different 331 /// data <-> GSI). msix_release_all(&mut self) -> MsixResult<()>332 fn msix_release_all(&mut self) -> MsixResult<()> { 333 for irqfd_gsi in self.irq_vec.drain(..).flatten() { 334 let request = VmIrqRequest::ReleaseOneIrq { 335 gsi: irqfd_gsi.gsi, 336 irqfd: irqfd_gsi.irqfd, 337 }; 338 339 self.msi_device_socket 340 .send(&request) 341 .map_err(MsixError::ReleaseOneIrqSend)?; 342 if let VmIrqResponse::Err(e) = self 343 .msi_device_socket 344 .recv() 345 .map_err(MsixError::ReleaseOneIrqRecv)? 346 { 347 return Err(MsixError::ReleaseOneIrq(e)); 348 } 349 } 350 Ok(()) 351 } 352 add_msi_route(&mut self, index: u16, gsi: u32) -> MsixResult<()>353 fn add_msi_route(&mut self, index: u16, gsi: u32) -> MsixResult<()> { 354 let mut data: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; 355 self.read_msix_table((index * 16).into(), data.as_mut()); 356 let msi_address: u64 = u64::from_le_bytes(data); 357 let mut data: [u8; 4] = [0, 0, 0, 0]; 358 self.read_msix_table((index * 16 + 8).into(), data.as_mut()); 359 let msi_data: u32 = u32::from_le_bytes(data); 360 361 if msi_address == 0 { 362 return Ok(()); 363 } 364 365 self.msi_device_socket 366 .send(&VmIrqRequest::AddMsiRoute { 367 gsi, 368 msi_address, 369 msi_data, 370 }) 371 .map_err(MsixError::AddMsiRouteSend)?; 372 if let VmIrqResponse::Err(e) = self 373 .msi_device_socket 374 .recv() 375 .map_err(MsixError::AddMsiRouteRecv)? 376 { 377 return Err(MsixError::AddMsiRoute(e)); 378 } 379 Ok(()) 380 } 381 382 // Enable MSI-X msix_enable_all(&mut self) -> MsixResult<()>383 fn msix_enable_all(&mut self) -> MsixResult<()> { 384 for index in 0..self.irq_vec.len() { 385 self.msix_enable_one(index)?; 386 } 387 Ok(()) 388 } 389 390 // Use a new MSI-X vector 391 // Create a new eventfd and bind them to a new msi msix_enable_one(&mut self, index: usize) -> MsixResult<()>392 fn msix_enable_one(&mut self, index: usize) -> MsixResult<()> { 393 if self.irq_vec[index].is_some() 394 || !self.enabled() 395 || self.masked() 396 || self.table_masked(index) 397 { 398 return Ok(()); 399 } 400 let irqfd = Event::new().map_err(MsixError::AllocateOneMsi)?; 401 let request = VmIrqRequest::AllocateOneMsi { 402 irqfd, 403 device_id: self.pci_id, 404 queue_id: index, 405 device_name: self.device_name.clone(), 406 }; 407 self.msi_device_socket 408 .send(&request) 409 .map_err(MsixError::AllocateOneMsiSend)?; 410 let irq_num: u32 = match self 411 .msi_device_socket 412 .recv() 413 .map_err(MsixError::AllocateOneMsiRecv)? 414 { 415 VmIrqResponse::AllocateOneMsi { gsi } => gsi, 416 VmIrqResponse::Err(e) => return Err(MsixError::AllocateOneMsi(e)), 417 _ => unreachable!(), 418 }; 419 self.irq_vec[index] = Some(IrqfdGsi { 420 irqfd: match request { 421 VmIrqRequest::AllocateOneMsi { irqfd, .. } => irqfd, 422 _ => unreachable!(), 423 }, 424 gsi: irq_num, 425 }); 426 427 self.add_msi_route(index as u16, irq_num)?; 428 Ok(()) 429 } 430 431 /// Read MSI-X table 432 /// # Arguments 433 /// * 'offset' - the offset within the MSI-X Table 434 /// * 'data' - used to store the read results 435 /// 436 /// For all accesses to MSI-X Table and MSI-X PBA fields, software must use aligned full 437 /// DWORD or aligned full QWORD transactions; otherwise, the result is undefined. 438 /// 439 /// location: DWORD3 DWORD2 DWORD1 DWORD0 440 /// entry 0: Vector Control Msg Data Msg Upper Addr Msg Addr 441 /// entry 1: Vector Control Msg Data Msg Upper Addr Msg Addr 442 /// entry 2: Vector Control Msg Data Msg Upper Addr Msg Addr 443 /// ... read_msix_table(&self, offset: u64, data: &mut [u8])444 pub fn read_msix_table(&self, offset: u64, data: &mut [u8]) { 445 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 446 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 447 448 if index >= self.table_entries.len() { 449 error!("invalid MSI-X table index {}", index); 450 return; 451 } 452 453 match data.len() { 454 4 => { 455 let value = match modulo_offset { 456 0x0 => self.table_entries[index].msg_addr_lo, 457 0x4 => self.table_entries[index].msg_addr_hi, 458 0x8 => self.table_entries[index].msg_data, 459 0xc => self.table_entries[index].vector_ctl, 460 _ => { 461 error!("invalid offset"); 462 0 463 } 464 }; 465 466 data.copy_from_slice(&value.to_le_bytes()); 467 } 468 8 => { 469 let value = match modulo_offset { 470 0x0 => { 471 (u64::from(self.table_entries[index].msg_addr_hi) << 32) 472 | u64::from(self.table_entries[index].msg_addr_lo) 473 } 474 0x8 => { 475 (u64::from(self.table_entries[index].vector_ctl) << 32) 476 | u64::from(self.table_entries[index].msg_data) 477 } 478 _ => { 479 error!("invalid offset"); 480 0 481 } 482 }; 483 484 data.copy_from_slice(&value.to_le_bytes()); 485 } 486 _ => error!("invalid data length"), 487 }; 488 } 489 490 /// Write to MSI-X table 491 /// 492 /// Message Address: the contents of this field specifies the address 493 /// for the memory write transaction; different MSI-X vectors have 494 /// different Message Address values 495 /// Message Data: the contents of this field specifies the data driven 496 /// on AD\[31::00\] during the memory write transaction's data phase. 497 /// Vector Control: only bit 0 (Mask Bit) is not reserved: when this bit 498 /// is set, the function is prohibited from sending a message using 499 /// this MSI-X Table entry. write_msix_table(&mut self, offset: u64, data: &[u8]) -> MsixStatus500 pub fn write_msix_table(&mut self, offset: u64, data: &[u8]) -> MsixStatus { 501 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 502 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 503 504 if index >= self.table_entries.len() { 505 error!("invalid MSI-X table index {}", index); 506 return MsixStatus::NothingToDo; 507 } 508 509 // Store the value of the entry before modification 510 let old_entry = self.table_entries[index].clone(); 511 512 match data.len() { 513 4 => { 514 let value = u32::from_le_bytes(data.try_into().unwrap()); 515 match modulo_offset { 516 0x0 => self.table_entries[index].msg_addr_lo = value, 517 0x4 => self.table_entries[index].msg_addr_hi = value, 518 0x8 => self.table_entries[index].msg_data = value, 519 0xc => self.table_entries[index].vector_ctl = value, 520 _ => error!("invalid offset"), 521 }; 522 } 523 8 => { 524 let value = u64::from_le_bytes(data.try_into().unwrap()); 525 match modulo_offset { 526 0x0 => { 527 self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32; 528 self.table_entries[index].msg_addr_hi = (value >> 32) as u32; 529 } 530 0x8 => { 531 self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32; 532 self.table_entries[index].vector_ctl = (value >> 32) as u32; 533 } 534 _ => error!("invalid offset"), 535 }; 536 } 537 _ => error!("invalid data length"), 538 }; 539 540 let new_entry = self.table_entries[index].clone(); 541 542 // This MSI-X vector is enabled for the first time. 543 if self.enabled() 544 && !self.masked() 545 && self.irq_vec[index].is_none() 546 && old_entry.masked() 547 && !new_entry.masked() 548 { 549 if let Err(e) = self.msix_enable_one(index) { 550 error!("failed to enable MSI-X vector {}: {}", index, e); 551 self.table_entries[index].vector_ctl |= MSIX_TABLE_ENTRY_MASK_BIT; 552 } 553 return MsixStatus::EntryChanged(index); 554 } 555 556 if self.enabled() 557 && (old_entry.msg_addr_lo != new_entry.msg_addr_lo 558 || old_entry.msg_addr_hi != new_entry.msg_addr_hi 559 || old_entry.msg_data != new_entry.msg_data) 560 { 561 if let Some(irqfd_gsi) = &self.irq_vec[index] { 562 let irq_num = irqfd_gsi.gsi; 563 if let Err(e) = self.add_msi_route(index as u16, irq_num) { 564 error!("add_msi_route failed: {}", e); 565 } 566 } 567 } 568 569 // After the MSI-X table entry has been updated, it is necessary to 570 // check if the vector control masking bit has changed. In case the 571 // bit has been flipped from 1 to 0, we need to inject a MSI message 572 // if the corresponding pending bit from the PBA is set. Once the MSI 573 // has been injected, the pending bit in the PBA needs to be cleared. 574 // All of this is valid only if MSI-X has not been masked for the whole 575 // device. 576 577 // Check if bit has been flipped 578 if !self.masked() { 579 if old_entry.masked() && !self.table_entries[index].masked() { 580 if self.get_pba_bit(index as u16) == 1 { 581 self.inject_msix_and_clear_pba(index); 582 } 583 return MsixStatus::EntryChanged(index); 584 } else if !old_entry.masked() && self.table_entries[index].masked() { 585 return MsixStatus::EntryChanged(index); 586 } 587 } 588 MsixStatus::NothingToDo 589 } 590 591 /// Read PBA Entries 592 /// # Arguments 593 /// * 'offset' - the offset within the PBA entries 594 /// * 'data' - used to store the read results 595 /// 596 /// Pending Bits\[63::00\]: For each Pending Bit that is set, the function 597 /// has a pending message for the associated MSI-X Table entry. read_pba_entries(&self, offset: u64, data: &mut [u8])598 pub fn read_pba_entries(&self, offset: u64, data: &mut [u8]) { 599 let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize; 600 let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO; 601 602 if index >= self.pba_entries.len() { 603 error!("invalid PBA index {}", index); 604 return; 605 } 606 607 match data.len() { 608 4 => { 609 let value: u32 = match modulo_offset { 610 0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32, 611 0x4 => (self.pba_entries[index] >> 32) as u32, 612 _ => { 613 error!("invalid offset"); 614 0 615 } 616 }; 617 618 data.copy_from_slice(&value.to_le_bytes()); 619 } 620 8 => { 621 let value: u64 = match modulo_offset { 622 0x0 => self.pba_entries[index], 623 _ => { 624 error!("invalid offset"); 625 0 626 } 627 }; 628 629 data.copy_from_slice(&value.to_le_bytes()); 630 } 631 _ => error!("invalid data length"), 632 } 633 } 634 635 /// Write to PBA Entries 636 /// 637 /// Software should never write, and should only read Pending Bits. 638 /// If software writes to Pending Bits, the result is undefined. write_pba_entries(&mut self, _offset: u64, _data: &[u8])639 pub fn write_pba_entries(&mut self, _offset: u64, _data: &[u8]) { 640 error!("Pending Bit Array is read only"); 641 } 642 set_pba_bit(&mut self, vector: u16, set: bool)643 fn set_pba_bit(&mut self, vector: u16, set: bool) { 644 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 645 646 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 647 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 648 let mut mask: u64 = (1 << shift) as u64; 649 650 if set { 651 self.pba_entries[index] |= mask; 652 } else { 653 mask = !mask; 654 self.pba_entries[index] &= mask; 655 } 656 } 657 get_pba_bit(&self, vector: u16) -> u8658 fn get_pba_bit(&self, vector: u16) -> u8 { 659 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 660 661 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 662 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 663 664 ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8 665 } 666 inject_msix_and_clear_pba(&mut self, vector: usize)667 fn inject_msix_and_clear_pba(&mut self, vector: usize) { 668 if let Some(irq) = &self.irq_vec[vector] { 669 irq.irqfd.signal().unwrap(); 670 } 671 672 // Clear the bit from PBA 673 self.set_pba_bit(vector as u16, false); 674 } 675 676 /// Inject virtual interrupt to the guest 677 /// 678 /// # Arguments 679 /// * 'vector' - the index to the MSI-X Table entry 680 /// 681 /// PCI Spec 3.0 6.8.3.5: while a vector is masked, the function is 682 /// prohibited from sending the associated message, and the function 683 /// must set the associated Pending bit whenever the function would 684 /// otherwise send the message. When software unmasks a vector whose 685 /// associated Pending bit is set, the function must schedule sending 686 /// the associated message, and clear the Pending bit as soon as the 687 /// message has been sent. 688 /// 689 /// If the vector is unmasked, writing to irqfd which wakes up KVM to 690 /// inject virtual interrupt to the guest. trigger(&mut self, vector: u16)691 pub fn trigger(&mut self, vector: u16) { 692 if self.table_entries[vector as usize].masked() || self.masked() { 693 self.set_pba_bit(vector, true); 694 } else if let Some(irq) = self.irq_vec.get(vector as usize).unwrap_or(&None) { 695 irq.irqfd.signal().unwrap(); 696 } 697 } 698 699 /// Return the raw descriptor of the MSI device socket get_msi_socket(&self) -> RawDescriptor700 pub fn get_msi_socket(&self) -> RawDescriptor { 701 self.msi_device_socket.as_raw_descriptor() 702 } 703 704 /// Return irqfd of MSI-X Table entry 705 /// 706 /// # Arguments 707 /// * 'vector' - the index to the MSI-X table entry get_irqfd(&self, vector: usize) -> Option<&Event>708 pub fn get_irqfd(&self, vector: usize) -> Option<&Event> { 709 match self.irq_vec.get(vector).unwrap_or(&None) { 710 Some(irq) => Some(&irq.irqfd), 711 None => None, 712 } 713 } 714 destroy(&mut self)715 pub fn destroy(&mut self) { 716 while let Some(irq) = self.irq_vec.pop() { 717 if let Some(irq) = irq { 718 let request = VmIrqRequest::ReleaseOneIrq { 719 gsi: irq.gsi, 720 irqfd: irq.irqfd, 721 }; 722 if self.msi_device_socket.send(&request).is_err() { 723 continue; 724 } 725 let _ = self.msi_device_socket.recv::<VmIrqResponse>(); 726 } 727 } 728 } 729 } 730 731 const MSIX_CONFIG_READ_MASK: [u32; 3] = [0xc000_0000, 0, 0]; 732 733 impl PciCapConfig for MsixConfig { read_mask(&self) -> &'static [u32]734 fn read_mask(&self) -> &'static [u32] { 735 &MSIX_CONFIG_READ_MASK 736 } 737 read_reg(&self, reg_idx: usize) -> u32738 fn read_reg(&self, reg_idx: usize) -> u32 { 739 if reg_idx == 0 { 740 self.read_msix_capability(0) 741 } else { 742 0 743 } 744 } 745 write_reg( &mut self, reg_idx: usize, offset: u64, data: &[u8], ) -> Option<Box<dyn PciCapConfigWriteResult>>746 fn write_reg( 747 &mut self, 748 reg_idx: usize, 749 offset: u64, 750 data: &[u8], 751 ) -> Option<Box<dyn PciCapConfigWriteResult>> { 752 let status = if reg_idx == 0 { 753 self.write_msix_capability(offset, data) 754 } else { 755 MsixStatus::NothingToDo 756 }; 757 Some(Box::new(status)) 758 } 759 } 760 761 impl AsRawDescriptor for MsixConfig { as_raw_descriptor(&self) -> RawDescriptor762 fn as_raw_descriptor(&self) -> RawDescriptor { 763 self.msi_device_socket.as_raw_descriptor() 764 } 765 } 766 767 /// Message Control Register 768 // 10-0: MSI-X Table size 769 // 13-11: Reserved 770 // 14: Mask. Mask all MSI-X when set. 771 // 15: Enable. Enable all MSI-X when set. 772 // See <https://wiki.osdev.org/PCI#Enabling_MSI-X> for the details. 773 #[bitfield] 774 #[derive(Copy, Clone, Default, FromBytes, Immutable, IntoBytes, KnownLayout)] 775 pub struct MsixCtrl { 776 table_size: B10, 777 reserved: B4, 778 mask: B1, 779 enable: B1, 780 } 781 782 #[allow(dead_code)] 783 #[repr(C)] 784 #[derive(Clone, Copy, Default, FromBytes, Immutable, IntoBytes, KnownLayout)] 785 /// MSI-X Capability Structure 786 pub struct MsixCap { 787 // To make add_capability() happy 788 _cap_vndr: u8, 789 _cap_next: u8, 790 // Message Control Register 791 msg_ctl: MsixCtrl, 792 // Table. Contains the offset and the BAR indicator (BIR) 793 // 2-0: Table BAR indicator (BIR). Can be 0 to 5. 794 // 31-3: Table offset in the BAR pointed by the BIR. 795 table: u32, 796 // Pending Bit Array. Contains the offset and the BAR indicator (BIR) 797 // 2-0: PBA BAR indicator (BIR). Can be 0 to 5. 798 // 31-3: PBA offset in the BAR pointed by the BIR. 799 pba: u32, 800 } 801 802 impl PciCapability for MsixCap { bytes(&self) -> &[u8]803 fn bytes(&self) -> &[u8] { 804 self.as_bytes() 805 } 806 id(&self) -> PciCapabilityID807 fn id(&self) -> PciCapabilityID { 808 PciCapabilityID::Msix 809 } 810 writable_bits(&self) -> Vec<u32>811 fn writable_bits(&self) -> Vec<u32> { 812 // Only msg_ctl[15:14] is writable 813 vec![0x3000_0000, 0, 0] 814 } 815 } 816 817 impl MsixCap { new( table_pci_bar: u8, table_size: u16, table_off: u32, pba_pci_bar: u8, pba_off: u32, ) -> Self818 pub fn new( 819 table_pci_bar: u8, 820 table_size: u16, 821 table_off: u32, 822 pba_pci_bar: u8, 823 pba_off: u32, 824 ) -> Self { 825 assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE); 826 827 // Set the table size and enable MSI-X. 828 let mut msg_ctl = MsixCtrl::new(); 829 msg_ctl.set_enable(1); 830 // Table Size is N - 1 encoded. 831 msg_ctl.set_table_size(table_size - 1); 832 833 MsixCap { 834 _cap_vndr: 0, 835 _cap_next: 0, 836 msg_ctl, 837 table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8), 838 pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8), 839 } 840 } 841 } 842 843 #[cfg(test)] 844 mod tests { 845 846 use std::thread; 847 848 use super::*; 849 850 #[track_caller] recv_allocate_msi(t: &Tube) -> u32851 fn recv_allocate_msi(t: &Tube) -> u32 { 852 match t.recv::<VmIrqRequest>().unwrap() { 853 VmIrqRequest::AllocateOneMsiAtGsi { gsi, .. } => gsi, 854 msg => panic!("unexpected irqchip message: {:?}", msg), 855 } 856 } 857 858 struct MsiRouteDetails { 859 gsi: u32, 860 msi_address: u64, 861 msi_data: u32, 862 } 863 864 #[track_caller] recv_add_msi_route(t: &Tube) -> MsiRouteDetails865 fn recv_add_msi_route(t: &Tube) -> MsiRouteDetails { 866 match t.recv::<VmIrqRequest>().unwrap() { 867 VmIrqRequest::AddMsiRoute { 868 gsi, 869 msi_address, 870 msi_data, 871 } => MsiRouteDetails { 872 gsi, 873 msi_address, 874 msi_data, 875 }, 876 msg => panic!("unexpected irqchip message: {:?}", msg), 877 } 878 } 879 880 #[track_caller] recv_release_one_irq(t: &Tube) -> u32881 fn recv_release_one_irq(t: &Tube) -> u32 { 882 match t.recv::<VmIrqRequest>().unwrap() { 883 VmIrqRequest::ReleaseOneIrq { gsi, irqfd: _ } => gsi, 884 msg => panic!("unexpected irqchip message: {:?}", msg), 885 } 886 } 887 888 #[track_caller] send_ok(t: &Tube)889 fn send_ok(t: &Tube) { 890 t.send(&VmIrqResponse::Ok).unwrap(); 891 } 892 893 /// Tests a cold restore where there are no existing vectors at the time 894 /// restore is called. 895 #[test] verify_msix_restore_cold_smoke()896 fn verify_msix_restore_cold_smoke() { 897 let (irqchip_tube, msix_config_tube) = Tube::pair().unwrap(); 898 let (_unused, unused_config_tube) = Tube::pair().unwrap(); 899 900 let mut cfg = MsixConfig::new(2, unused_config_tube, 0, "test_device".to_owned()); 901 902 // Set up two MSI-X vectors (0 and 1). 903 // Data is 0xdVEC_NUM. Address is 0xaVEC_NUM. 904 cfg.table_entries[0].msg_data = 0xd0; 905 cfg.table_entries[0].msg_addr_lo = 0xa0; 906 cfg.table_entries[0].msg_addr_hi = 0; 907 cfg.table_entries[1].msg_data = 0xd1; 908 cfg.table_entries[1].msg_addr_lo = 0xa1; 909 cfg.table_entries[1].msg_addr_hi = 0; 910 911 // Pretend that these vectors were hooked up to GSIs 10 & 20, 912 // respectively. 913 cfg.irq_vec = vec![ 914 Some(IrqfdGsi { 915 gsi: 10, 916 irqfd: Event::new().unwrap(), 917 }), 918 Some(IrqfdGsi { 919 gsi: 20, 920 irqfd: Event::new().unwrap(), 921 }), 922 ]; 923 924 // Take a snapshot of MsixConfig. 925 let snapshot = cfg.snapshot().unwrap(); 926 927 // Create a fake irqchip to respond to our requests 928 let irqchip_fake = thread::spawn(move || { 929 assert_eq!(recv_allocate_msi(&irqchip_tube), 10); 930 send_ok(&irqchip_tube); 931 let route_one = recv_add_msi_route(&irqchip_tube); 932 assert_eq!(route_one.gsi, 10); 933 assert_eq!(route_one.msi_address, 0xa0); 934 assert_eq!(route_one.msi_data, 0xd0); 935 send_ok(&irqchip_tube); 936 937 assert_eq!(recv_allocate_msi(&irqchip_tube), 20); 938 send_ok(&irqchip_tube); 939 let route_two = recv_add_msi_route(&irqchip_tube); 940 assert_eq!(route_two.gsi, 20); 941 assert_eq!(route_two.msi_address, 0xa1); 942 assert_eq!(route_two.msi_data, 0xd1); 943 send_ok(&irqchip_tube); 944 irqchip_tube 945 }); 946 947 let mut restored_cfg = MsixConfig::new(10, msix_config_tube, 10, "some_device".to_owned()); 948 restored_cfg.restore(snapshot).unwrap(); 949 irqchip_fake.join().unwrap(); 950 951 assert_eq!(restored_cfg.pci_id, 0); 952 assert_eq!(restored_cfg.device_name, "test_device"); 953 } 954 955 /// Tests a warm restore where there are existing vectors at the time 956 /// restore is called. These vectors need to be released first. 957 #[test] verify_msix_restore_warm_smoke()958 fn verify_msix_restore_warm_smoke() { 959 let (irqchip_tube, msix_config_tube) = Tube::pair().unwrap(); 960 961 let mut cfg = MsixConfig::new(2, msix_config_tube, 0, "test_device".to_owned()); 962 963 // Set up two MSI-X vectors (0 and 1). 964 // Data is 0xdVEC_NUM. Address is 0xaVEC_NUM. 965 cfg.table_entries[0].msg_data = 0xd0; 966 cfg.table_entries[0].msg_addr_lo = 0xa0; 967 cfg.table_entries[0].msg_addr_hi = 0; 968 cfg.table_entries[1].msg_data = 0xd1; 969 cfg.table_entries[1].msg_addr_lo = 0xa1; 970 cfg.table_entries[1].msg_addr_hi = 0; 971 972 // Pretend that these vectors were hooked up to GSIs 10 & 20, 973 // respectively. 974 cfg.irq_vec = vec![ 975 Some(IrqfdGsi { 976 gsi: 10, 977 irqfd: Event::new().unwrap(), 978 }), 979 Some(IrqfdGsi { 980 gsi: 20, 981 irqfd: Event::new().unwrap(), 982 }), 983 ]; 984 985 // Take a snapshot of MsixConfig. 986 let snapshot = cfg.snapshot().unwrap(); 987 988 // Create a fake irqchip to respond to our requests 989 let irqchip_fake = thread::spawn(move || { 990 // First, we free the existing vectors / GSIs. 991 assert_eq!(recv_release_one_irq(&irqchip_tube), 10); 992 send_ok(&irqchip_tube); 993 assert_eq!(recv_release_one_irq(&irqchip_tube), 20); 994 send_ok(&irqchip_tube); 995 996 // Now we re-allocate them. 997 assert_eq!(recv_allocate_msi(&irqchip_tube), 10); 998 send_ok(&irqchip_tube); 999 let route_one = recv_add_msi_route(&irqchip_tube); 1000 assert_eq!(route_one.gsi, 10); 1001 assert_eq!(route_one.msi_address, 0xa0); 1002 assert_eq!(route_one.msi_data, 0xd0); 1003 send_ok(&irqchip_tube); 1004 1005 assert_eq!(recv_allocate_msi(&irqchip_tube), 20); 1006 send_ok(&irqchip_tube); 1007 let route_two = recv_add_msi_route(&irqchip_tube); 1008 assert_eq!(route_two.gsi, 20); 1009 assert_eq!(route_two.msi_address, 0xa1); 1010 assert_eq!(route_two.msi_data, 0xd1); 1011 send_ok(&irqchip_tube); 1012 irqchip_tube 1013 }); 1014 1015 cfg.restore(snapshot).unwrap(); 1016 irqchip_fake.join().unwrap(); 1017 1018 assert_eq!(cfg.pci_id, 0); 1019 assert_eq!(cfg.device_name, "test_device"); 1020 } 1021 } 1022